PyPI - hossam - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

hossam 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

hossam/hs_plot.py +160 -37
hossam/hs_prep.py +37 -5
hossam/hs_stats.py +149 -16
{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/METADATA +1 -1
{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/RECORD +8 -8
{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/WHEEL +0 -0
{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/licenses/LICENSE +0 -0
{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/top_level.txt +0 -0

hossam/hs_plot.py CHANGED Viewed

@@ -35,12 +35,12 @@ if pd.__version__ > "2.0.0":
 config = SimpleNamespace(
     dpi=200,
-    width=800,
-    height=520,
-    font_size=10,
+    width=600,
+    height=350,
+    font_size=7,
     font_weight="normal",
-    frame_width=0.7,
-    line_width=1.5,
+    frame_width=0.5,
+    line_width=1,
     grid_alpha=0.3,
     grid_width=0.5,
     fill_alpha=0.3
@@ -49,7 +49,7 @@ config = SimpleNamespace(
 # ===================================================================
 # 기본 크기가 설정된 Figure와 Axes를 생성한다
 # ===================================================================
-def get_default_ax(width: int = config.width, height: int = config.height, rows: int = 1, cols: int = 1, dpi: int = config.dpi, flatten: bool = False, ws: int | None = None, hs: int | None = None):
+def get_default_ax(width: int = config.width, height: int = config.height, rows: int = 1, cols: int = 1, dpi: int = config.dpi, flatten: bool = False, ws: int | None = None, hs: int | None = None, title: str = None):
     """기본 크기의 Figure와 Axes를 생성한다.
     Args:
@@ -61,6 +61,7 @@ def get_default_ax(width: int = config.width, height: int = config.height, rows:
         flatten (bool): Axes 배열을 1차원 리스트로 평탄화할지 여부.
         ws (int|None): 서브플롯 가로 간격(`wspace`). rows/cols가 1보다 클 때만 적용.
         hs (int|None): 서브플롯 세로 간격(`hspace`). rows/cols가 1보다 클 때만 적용.
+        title (str|None): Figure 제목.
     Returns:
         tuple[Figure, Axes]: 생성된 matplotlib Figure와 Axes 객체.
@@ -68,9 +69,15 @@ def get_default_ax(width: int = config.width, height: int = config.height, rows:
     figsize = (width * cols / 100, height * rows / 100)
     fig, ax = plt.subplots(rows, cols, figsize=figsize, dpi=dpi)
-    if (rows > 1 or cols > 1) and (ws != None and hs != None):
+    # ax가 배열 (subplots)인지 단일 Axes인지 확인
+    is_array = isinstance(ax, (np.ndarray, list))
+    if is_array and (ws != None and hs != None):
         fig.subplots_adjust(wspace=ws, hspace=hs)
+    if title and not is_array:
+        fig.suptitle(title, fontsize=config.font_size * 1.5, fontweight='bold')
     if flatten == True:
         # 단일 Axes인 경우 리스트로 변환
         if rows == 1 and cols == 1:
@@ -94,19 +101,43 @@ def get_default_ax(width: int = config.width, height: int = config.height, rows:
     return fig, ax
+# ===================================================================
+# 기본 크기가 설정된 Figure와 Axes를 생성한다
+# ===================================================================
+def create_figure(width: int = config.width, height: int = config.height, rows: int = 1, cols: int = 1, dpi: int = config.dpi, flatten: bool = False, ws: int | None = None, hs: int | None = None, title: str = None):
+    """기본 크기의 Figure와 Axes를 생성한다. get_default_ax의 래퍼 함수.
+    Args:
+        width (int): 가로 픽셀 크기.
+        height (int): 세로 픽셀 크기.
+        rows (int): 서브플롯 행 개수.
+        cols (int): 서브플롯 열 개수.
+        dpi (int): 해상도(DPI).
+        flatten (bool): Axes 배열을 1차원 리스트로 평탄화할지 여부.
+        ws (int|None): 서브플롯 가로 간격(`wspace`). rows/cols가 1보다 클 때만 적용.
+        hs (int|None): 서브플롯 세로 간격(`hspace`). rows/cols가 1보다 클 때만 적용.
+        title (str): Figure 제목.
+    Returns:
+        tuple[Figure, Axes]: 생성된 matplotlib Figure와 Axes 객체.
+    """
+    fig, ax = get_default_ax(width, height, rows, cols, dpi, flatten, ws, hs, title)
+    return fig, ax
 # ===================================================================
 # 그래프의 그리드, 레이아웃을 정리하고 필요 시 저장 또는 표시한다
 # ===================================================================
-def finalize_plot(ax: Axes, callback: any = None, outparams: bool = False, save_path: str = None, grid: bool = True) -> None:
+def finalize_plot(ax: Axes | np.ndarray, callback: any = None, outparams: bool = False, save_path: str = None, grid: bool = True, title: str = None) -> None:
     """공통 후처리를 수행한다: 콜백 실행, 레이아웃 정리, 필요 시 표시/종료.
     Args:
-        ax (Axes|ndarray|list): 대상 Axes (단일 Axes 또는 subplots 배열).
+        ax (Axes|np.ndarray): 대상 Axes (단일 Axes 또는 subplots 배열).
         callback (Callable|None): 추가 설정을 위한 사용자 콜백.
         outparams (bool): 내부에서 생성한 Figure인 경우 True.
         save_path (str|None): 이미지 저장 경로. None이 아니면 해당 경로로 저장.
         grid (bool): 그리드 표시 여부. 기본값은 True입니다.
+        title (str|None): 그래프 제목.
     Returns:
         None
     """
@@ -131,6 +162,9 @@ def finalize_plot(ax: Axes, callback: any = None, outparams: bool = False, save_
     plt.tight_layout()
+    if title and not is_array:
+        ax.set_title(title, fontsize=config.font_size * 1.3, pad=7, fontweight='bold')
     if save_path is not None:
         plt.savefig(save_path, dpi=config.dpi * 2, bbox_inches='tight')
@@ -139,6 +173,27 @@ def finalize_plot(ax: Axes, callback: any = None, outparams: bool = False, save_
         plt.close()
+# ===================================================================
+# 그래프의 그리드, 레이아웃을 정리하고 필요 시 저장 또는 표시한다
+# ===================================================================
+def show_figure(ax: Axes | np.ndarray, callback: any = None, outparams: bool = False, save_path: str = None, grid: bool = True, title: str = None) -> None:
+    """공통 후처리를 수행한다: 콜백 실행, 레이아웃 정리, 필요 시 표시/종료.
+    finalize_plot의 래퍼 함수.
+    Args:
+        ax (Axes|np.ndarray): 대상 Axes (단일 Axes 또는 subplots 배열).
+        callback (Callable|None): 추가 설정을 위한 사용자 콜백.
+        outparams (bool): 내부에서 생성한 Figure인 경우 True.
+        save_path (str|None): 이미지 저장 경로. None이 아니면 해당 경로로 저장.
+        grid (bool): 그리드 표시 여부. 기본값은 True입니다.
+        title (str|None): 그래프 제목.
+    Returns:
+        None
+    """
+    finalize_plot(ax, callback, outparams, save_path, grid, title)
 # ===================================================================
 # 선 그래프를 그린다
 # ===================================================================
@@ -147,6 +202,7 @@ def lineplot(
     xname: str = None,
     yname: str = None,
     hue: str = None,
+    title: str | None = None,
     marker: str = None,
     palette: str = None,
     width: int = config.width,
@@ -165,6 +221,7 @@ def lineplot(
         xname (str|None): x축 컬럼명.
         yname (str|None): y축 컬럼명.
         hue (str|None): 범주 구분 컬럼명.
+        title (str|None): 그래프 제목.
         marker (str|None): 마커 모양.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
@@ -203,7 +260,7 @@ def lineplot(
     lineplot_kwargs.update(params)
     sb.lineplot(**lineplot_kwargs, linewidth=linewidth)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -213,6 +270,7 @@ def boxplot(
     df: DataFrame,
     xname: str = None,
     yname: str = None,
+    title: str | None = None,
     orient: str = "v",
     palette: str = None,
     width: int = config.width,
@@ -230,6 +288,7 @@ def boxplot(
         df (DataFrame): 시각화할 데이터.
         xname (str|None): x축 범주 컬럼명.
         yname (str|None): y축 값 컬럼명.
+        title (str|None): 그래프 제목.
         orient (str): 'v' 또는 'h' 방향.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
@@ -272,7 +331,7 @@ def boxplot(
     else:
         sb.boxplot(data=df, orient=orient, ax=ax, linewidth=linewidth, **params)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -283,6 +342,7 @@ def kdeplot(
     xname: str = None,
     yname: str = None,
     hue: str = None,
+    title: str | None = None,
     palette: str = None,
     fill: bool = False,
     fill_alpha: float = config.fill_alpha,
@@ -306,6 +366,7 @@ def kdeplot(
         xname (str|None): x축 컬럼명.
         yname (str|None): y축 컬럼명.
         hue (str|None): 범주 컬럼명.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         fill (bool): 면적 채우기 여부.
         fill_alpha (float): 채움 투명도.
@@ -369,7 +430,7 @@ def kdeplot(
             axes[idx].set_title(f"Q{idx+1}: [{lo:.3g}, {hi:.3g}]")
             axes[idx].grid(True, alpha=config.grid_alpha, linewidth=config.grid_width)
-        finalize_plot(axes[0], callback, outparams, save_path)
+        finalize_plot(axes[0], callback, outparams, save_path, True, title)
         return
     if ax is None:
@@ -403,7 +464,7 @@ def kdeplot(
     sb.kdeplot(**kdeplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -412,8 +473,9 @@ def kdeplot(
 def histplot(
     df: DataFrame,
     xname: str,
-    hue=None,
-    bins=None,
+    hue: str | None = None,
+    title: str | None = None,
+    bins: int | None = None,
     kde: bool = True,
     palette: str = None,
     width: int = config.width,
@@ -431,6 +493,7 @@ def histplot(
         df (DataFrame): 시각화할 데이터.
         xname (str): 히스토그램 대상 컬럼명.
         hue (str|None): 범주 컬럼명.
+        title (str|None): 그래프 제목.
         bins (int|sequence|None): 구간 수 또는 경계.
         kde (bool): KDE 표시 여부.
         palette (str|None): 팔레트 이름.
@@ -487,7 +550,7 @@ def histplot(
         histplot_kwargs.update(params)
         sb.histplot(**histplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -497,6 +560,7 @@ def stackplot(
     df: DataFrame,
     xname: str,
     hue: str,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -513,6 +577,7 @@ def stackplot(
         df (DataFrame): 시각화할 데이터.
         xname (str): x축 기준 컬럼.
         hue (str): 클래스 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -571,7 +636,7 @@ def stackplot(
         ax.set_xticks(xticks)
         ax.set_xticklabels(xticks)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -582,6 +647,7 @@ def scatterplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -599,6 +665,7 @@ def scatterplot(
         xname (str): x축 컬럼.
         yname (str): y축 컬럼.
         hue (str|None): 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -636,7 +703,7 @@ def scatterplot(
     sb.scatterplot(**scatterplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -646,6 +713,7 @@ def regplot(
     df: DataFrame,
     xname: str,
     yname: str,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -662,6 +730,7 @@ def regplot(
         df (DataFrame): 시각화할 데이터.
         xname (str): 독립변수 컬럼.
         yname (str): 종속변수 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 선/점 색상.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -702,7 +771,7 @@ def regplot(
     sb.regplot(**regplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -713,6 +782,7 @@ def lmplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -728,6 +798,7 @@ def lmplot(
         xname (str): 독립변수 컬럼.
         yname (str): 종속변수 컬럼.
         hue (str|None): 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -766,6 +837,9 @@ def lmplot(
     g.fig.grid(True, alpha=config.grid_alpha, linewidth=config.grid_width)
+    if title:
+        g.fig.suptitle(title, fontsize=config.font_size * 1.5, fontweight='bold')
     plt.tight_layout()
     if save_path is not None:
@@ -781,6 +855,7 @@ def lmplot(
 def pairplot(
     df: DataFrame,
     xnames=None,
+    title: str | None = None,
     diag_kind: str = "kde",
     hue=None,
     palette: str = None,
@@ -800,6 +875,7 @@ def pairplot(
             - str: 해당 컬럼에 대해서만 처리.
             - list: 주어진 컬럼들에 대해서만 처리.
             기본값은 None.
+        title (str|None): 그래프 제목.
         diag_kind (str): 대각선 플롯 종류('kde' 등).
         hue (str|None): 범주 컬럼.
         palette (str|None): 팔레트 이름.
@@ -852,6 +928,10 @@ def pairplot(
     scale = len(target_cols)
     g.fig.set_size_inches(w=(width / dpi) * scale, h=(height / dpi) * scale)
     g.fig.set_dpi(dpi)
+    if title:
+        g.fig.suptitle(title, fontsize=config.font_size * 1.5, fontweight='bold')
     g.map_lower(func=sb.kdeplot, fill=True, alpha=config.fill_alpha, linewidth=linewidth)
     g.map_upper(func=sb.scatterplot, linewidth=linewidth)
@@ -876,6 +956,7 @@ def countplot(
     df: DataFrame,
     xname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     order: int = 1,
     width: int = config.width,
@@ -893,6 +974,7 @@ def countplot(
         df (DataFrame): 시각화할 데이터.
         xname (str): 범주 컬럼.
         hue (str|None): 보조 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         order (int): 숫자형일 때 정렬 방식(1: 값 기준, 기타: 빈도 기준).
         width (int): 캔버스 가로 픽셀.
@@ -938,7 +1020,7 @@ def countplot(
     sb.countplot(**countplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -949,6 +1031,7 @@ def barplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -966,6 +1049,7 @@ def barplot(
         xname (str): 범주 컬럼.
         yname (str): 값 컬럼.
         hue (str|None): 보조 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -1002,7 +1086,7 @@ def barplot(
     barplot_kwargs.update(params)
     sb.barplot(**barplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1013,6 +1097,7 @@ def boxenplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -1030,6 +1115,7 @@ def boxenplot(
         xname (str): 범주 컬럼.
         yname (str): 값 컬럼.
         hue (str|None): 보조 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -1064,7 +1150,7 @@ def boxenplot(
     boxenplot_kwargs.update(params)
     sb.boxenplot(**boxenplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1075,6 +1161,7 @@ def violinplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -1092,6 +1179,7 @@ def violinplot(
         xname (str): 범주 컬럼.
         yname (str): 값 컬럼.
         hue (str|None): 보조 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -1125,7 +1213,7 @@ def violinplot(
     violinplot_kwargs.update(params)
     sb.violinplot(**violinplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1136,6 +1224,7 @@ def pointplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -1153,6 +1242,7 @@ def pointplot(
         xname (str): 범주 컬럼.
         yname (str): 값 컬럼.
         hue (str|None): 보조 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -1188,7 +1278,7 @@ def pointplot(
     pointplot_kwargs.update(params)
     sb.pointplot(**pointplot_kwargs)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1199,6 +1289,7 @@ def jointplot(
     xname: str,
     yname: str,
     hue=None,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -1214,6 +1305,7 @@ def jointplot(
         xname (str): x축 컬럼.
         yname (str): y축 컬럼.
         hue (str|None): 범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -1243,6 +1335,9 @@ def jointplot(
     g.fig.set_size_inches(width / dpi, height / dpi)
     g.fig.set_dpi(dpi)
+    if title:
+        g.fig.suptitle(title, fontsize=config.font_size * 1.5, fontweight='bold')
     # 중앙 및 주변 플롯에 grid 추가
     g.ax_joint.grid(True, alpha=config.grid_alpha, linewidth=config.grid_width)
     g.ax_marg_x.grid(True, alpha=config.grid_alpha, linewidth=config.grid_width)
@@ -1262,6 +1357,7 @@ def jointplot(
 # ===================================================================
 def heatmap(
     data: DataFrame,
+    title: str | None = None,
     palette: str = None,
     width: int | None = None,
     height: int | None = None,
@@ -1276,6 +1372,7 @@ def heatmap(
     Args:
         data (DataFrame): 행렬 형태 데이터.
+        title (str|None): 그래프 제목.
         palette (str|None): 컬러맵 이름.
         width (int|None): 캔버스 가로 픽셀. None이면 자동 계산.
         height (int|None): 캔버스 세로 픽셀. None이면 자동 계산.
@@ -1313,7 +1410,7 @@ def heatmap(
     # heatmap은 hue를 지원하지 않으므로 cmap에 palette 사용
     sb.heatmap(**heatmatp_kwargs)
-    finalize_plot(ax, callback, outparams, save_path, False)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1324,6 +1421,7 @@ def convex_hull(
     xname: str,
     yname: str,
     hue: str,
+    title: str | None = None,
     palette: str = None,
     width: int = config.width,
     height: int = config.height,
@@ -1341,6 +1439,7 @@ def convex_hull(
         xname (str): x축 컬럼.
         yname (str): y축 컬럼.
         hue (str): 클러스터/범주 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -1385,7 +1484,7 @@ def convex_hull(
     sb.scatterplot(
         data=data, x=xname, y=yname, hue=hue, palette=palette, ax=ax, **params
     )
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1394,10 +1493,12 @@ def convex_hull(
 def kde_confidence_interval(
     data: DataFrame,
     xnames=None,
+    title: str | None = None,
     clevel=0.95,
     width: int = config.width,
     height: int = config.height,
     linewidth: float = config.line_width,
+    fill: bool = False,
     dpi: int = config.dpi,
     save_path: str = None,
     callback: any = None,
@@ -1412,10 +1513,12 @@ def kde_confidence_interval(
             - str: 해당 컬럼에 대해서만 처리.
             - list: 주어진 컬럼들에 대해서만 처리.
             기본값은 None.
+        title (str|None): 그래프 제목.
         clevel (float): 신뢰수준(0~1).
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
         linewidth (float): 선 굵기.
+        fill (bool): KDE 채우기 여부.
         dpi (int): 그림 크기 및 해상도.
         callback (Callable|None): Axes 후처리 콜백.
         ax (Axes|None): 외부에서 전달한 Axes.
@@ -1469,7 +1572,7 @@ def kde_confidence_interval(
         cmin, cmax = t.interval(clevel, dof, loc=sample_mean, scale=sample_std_error)
         # 현재 컬럼에 대한 커널밀도추정
-        sb.kdeplot(data=column, linewidth=linewidth, ax=current_ax)
+        sb.kdeplot(data=column, linewidth=linewidth, ax=current_ax, fill=fill, alpha=config.fill_alpha)
         # 그래프 축의 범위
         xmin, xmax, ymin, ymax = current_ax.get_position().bounds
@@ -1494,7 +1597,7 @@ def kde_confidence_interval(
         current_ax.grid(True, alpha=config.grid_alpha, linewidth=config.grid_width)
-    finalize_plot(axes[0] if isinstance(axes, list) and len(axes) > 0 else ax, callback, outparams, save_path)
+    finalize_plot(axes[0] if isinstance(axes, list) and len(axes) > 0 else ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1504,6 +1607,7 @@ def pvalue1_anotation(
     data: DataFrame,
     target: str,
     hue: str,
+    title: str | None = None,
     pairs: list = None,
     test: str = "t-test_ind",
     text_format: str = "star",
@@ -1523,6 +1627,7 @@ def pvalue1_anotation(
         data (DataFrame): 시각화할 데이터.
         target (str): 값 컬럼명.
         hue (str): 그룹 컬럼명.
+        title (str|None): 그래프 제목.
         pairs (list|None): 비교할 (group_a, group_b) 튜플 목록. None이면 hue 컬럼의 모든 고유값 조합을 자동 생성.
         test (str): 적용할 통계 검정 이름.
         text_format (str): 주석 형식('star' 등).
@@ -1574,7 +1679,7 @@ def pvalue1_anotation(
     annotator.apply_and_annotate()
     sb.despine()
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
@@ -1583,6 +1688,7 @@ def pvalue1_anotation(
 # ===================================================================
 def ols_residplot(
     fit,
+    title: str | None = None,
     lowess: bool = False,
     mse: bool = False,
     width: int = config.width,
@@ -1603,6 +1709,7 @@ def ols_residplot(
     Args:
         fit: 회귀 모형 객체 (statsmodels의 RegressionResultsWrapper).
              fit.resid와 fit.fittedvalues를 통해 잔차와 적합값을 추출한다.
+        title (str|None): 그래프 제목.
         lowess (bool): LOWESS 스무딩 적용 여부.
         mse (bool): √MSE, 2√MSE, 3√MSE 대역선과 비율 표시 여부.
         width (int): 캔버스 가로 픽셀.
@@ -1632,7 +1739,7 @@ def ols_residplot(
     y = y_pred + resid  # 실제값 = 적합값 + 잔차
     if ax is None:
-        fig, ax = get_default_ax(width, height, 1, 1, dpi)
+        fig, ax = get_default_ax(width + 150 if mse else width, height, 1, 1, dpi)
         outparams = True
     # 산점도 직접 그리기 (seaborn.residplot보다 훨씬 빠름)
@@ -1702,7 +1809,7 @@ def ols_residplot(
                 color=c,
             )
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1710,6 +1817,7 @@ def ols_residplot(
 # ===================================================================
 def ols_qqplot(
     fit,
+    title: str | None = None,
     line: str = 's',
     width: int = config.width,
     height: int = config.height,
@@ -1728,6 +1836,7 @@ def ols_qqplot(
     Args:
         fit: 회귀 모형 객체 (statsmodels의 RegressionResultsWrapper 등).
              fit.resid 속성을 통해 잔차를 추출하여 정규성을 확인한다.
+        title (str|None): 그래프 제목.
         line (str): 참조선의 유형. 기본값 's' (standardized).
                     - 's': 표본의 표준편차와 평균을 기반으로 조정된 선 (권장)
                     - 'r': 실제 점들에 대한 회귀선 (데이터 추세 반영)
@@ -1788,7 +1897,7 @@ def ols_qqplot(
         if line.get_linestyle() == '--' or line.get_color() == 'r':
             line.set_linewidth(linewidth)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -1796,6 +1905,7 @@ def ols_qqplot(
 # ===================================================================
 def distribution_by_class(
     data: DataFrame,
+    title: str | None = None,
     xnames: list = None,
     hue: str = None,
     type: str = "kde",
@@ -1815,6 +1925,7 @@ def distribution_by_class(
         data (DataFrame): 시각화할 데이터.
         xnames (list|None): 대상 컬럼 목록(None이면 전 컬럼).
         hue (str|None): 클래스 컬럼.
+        title (str|None): 그래프 제목.
         type (str): 'kde' | 'hist' | 'histkde'.
         bins (int|sequence|None): 히스토그램 구간.
         palette (str|None): 팔레트 이름.
@@ -1897,6 +2008,7 @@ def scatter_by_class(
     yname: str,
     group: list | None = None,
     hue: str | None = None,
+    title: str | None = None,
     palette: str | None = None,
     outline: bool = False,
     width: int = config.width,
@@ -1913,6 +2025,7 @@ def scatter_by_class(
         yname (str): 종속변수 컬럼명(필수).
         group (list|None): x 컬럼 목록 또는 [[x, y], ...] 형태. None이면 자동 생성.
         hue (str|None): 클래스 컬럼.
+        title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         outline (bool): 볼록 껍질을 표시할지 여부.
         width (int): 캔버스 가로 픽셀.
@@ -1968,6 +2081,7 @@ def categorical_target_distribution(
     data: DataFrame,
     yname: str,
     hue: list | str | None = None,
+    title: str | None = None,
     kind: str = "box",
     kde_fill: bool = True,
     palette: str | None = None,
@@ -1985,6 +2099,7 @@ def categorical_target_distribution(
         data (DataFrame): 시각화할 데이터.
         yname (str): 종속변수 컬럼명(연속형 추천).
         hue (list|str|None): 명목형 독립변수 목록. None이면 자동 탐지.
+        title (str|None): 그래프 제목.
         kind (str): 'box', 'violin', 'kde'.
         kde_fill (bool): kind='kde'일 때 영역 채우기 여부.
         palette (str|None): 팔레트 이름.
@@ -2043,7 +2158,7 @@ def categorical_target_distribution(
     for j in range(n_plots, len(axes)):
         axes[j].set_visible(False)
-    finalize_plot(axes[0], callback, outparams, save_path)
+    finalize_plot(axes[0], callback, outparams, save_path, True, title)
 # ===================================================================
@@ -2053,6 +2168,7 @@ def roc_curve_plot(
     fit,
     y: np.ndarray | pd.Series = None,
     X: pd.DataFrame | np.ndarray = None,
+    title: str | None = None,
     width: int = config.height,
     height: int = config.height,
     linewidth: float = config.line_width,
@@ -2067,6 +2183,7 @@ def roc_curve_plot(
         fit: statsmodels Logit 결과 객체 (`fit.predict()`로 예측 확률을 계산 가능해야 함).
         y (array-like|None): 외부 데이터의 실제 레이블. 제공 시 이를 실제값으로 사용.
         X (array-like|None): 외부 데이터의 설계행렬(독립변수). 제공 시 해당 데이터로 예측 확률 계산.
+        title (str|None): 그래프 제목.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
         linewidth (float): 선 굵기.
@@ -2113,7 +2230,7 @@ def roc_curve_plot(
     ax.set_ylabel('재현율 (True Positive Rate)', fontsize=8)
     ax.set_title('ROC 곡선', fontsize=10, fontweight='bold')
     ax.legend(loc="lower right", fontsize=7)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -2121,6 +2238,7 @@ def roc_curve_plot(
 # ===================================================================
 def confusion_matrix_plot(
     fit,
+    title: str | None = None,
     threshold: float = 0.5,
     width: int = config.width,
     height: int = config.height,
@@ -2133,6 +2251,7 @@ def confusion_matrix_plot(
     Args:
         fit: statsmodels Logit 결과 객체 (`fit.predict()`로 예측 확률을 계산 가능해야 함).
+        title (str|None): 그래프 제목.
         threshold (float): 예측 확률을 이진 분류로 변환할 임계값. 기본값 0.5.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
@@ -2163,7 +2282,7 @@ def confusion_matrix_plot(
     ax.set_title(f'혼동행렬 (임계값: {threshold})', fontsize=8, fontweight='bold')
-    finalize_plot(ax, callback, outparams, save_path, False)
+    finalize_plot(ax, callback, outparams, save_path, False, title)
 # ===================================================================
@@ -2173,6 +2292,7 @@ def radarplot(
     df: DataFrame,
     columns: list = None,
     hue: str = None,
+    title: str | None = None,
     normalize: bool = True,
     fill: bool = True,
     fill_alpha: float = 0.25,
@@ -2192,6 +2312,7 @@ def radarplot(
         df (DataFrame): 시각화할 데이터.
         columns (list|None): 레이더 차트에 표시할 컬럼 목록. None이면 모든 숫자형 컬럼 사용.
         hue (str|None): 집단 구분 컬럼. None이면 각 행을 개별 객체로 표시.
+        title (str|None): 그래프 제목.
         normalize (bool): 0-1 범위로 정규화 여부. 기본값 True.
         fill (bool): 영역 채우기 여부.
         fill_alpha (float): 채움 투명도.
@@ -2293,7 +2414,7 @@ def radarplot(
     else:
         ax.set_title('Radar Chart', pad=20)
-    finalize_plot(ax, callback, outparams, save_path)
+    finalize_plot(ax, callback, outparams, save_path, True, title)
 # ===================================================================
@@ -2302,6 +2423,7 @@ def radarplot(
 def distribution_plot(
     data: DataFrame,
     column: str,
+    title: str | None = None,
     clevel: float = 0.95,
     orient: str = "h",
     hue: str | None = None,
@@ -2322,6 +2444,7 @@ def distribution_plot(
     Args:
         data (DataFrame): 시각화할 데이터.
         column (str): 분석할 컬럼명.
+        title (str|None): 그래프 제목.
         clevel (float): KDE 신뢰수준 (0~1). 기본값 0.95.
         orient (str): Boxplot 방향 ('v' 또는 'h'). 기본값 'h'.
         hue (str|None): 명목형 컬럼명. 지정하면 각 범주별로 행을 늘려 KDE와 boxplot을 그림.

hossam/hs_prep.py CHANGED Viewed

@@ -158,16 +158,23 @@ def minmax_scaler(
 # ===================================================================
 # 지정된 컬럼들을 범주형 데이터로 설정한다
 # ===================================================================
-def set_category(data: DataFrame, *args: str) -> DataFrame:
+def set_category(data: DataFrame, *args: str, columns: list = None) -> DataFrame:
     """카테고리 데이터를 설정한다.
     Args:
         data (DataFrame): 데이터프레임 객체
         *args (str): 컬럼명 목록
+        columns (list, optional): 변환할 컬럼명 목록. args와 중복 사용 불가.
     Returns:
         DataFrame: 카테고리 설정된 데이터프레임
     """
+    # columns 인자가 있으면 args보다 우선한다.
+    if columns is not None:
+        if args:
+            raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
+        args = columns
     df = data.copy()
     for k in args:
@@ -219,7 +226,7 @@ def unmelt(
 # ===================================================================
 # 지정된 변수의 이상치 테이블로 반환한다
 # ===================================================================
-def outlier_table(data: DataFrame, *fields: str) -> DataFrame:
+def outlier_table(data: DataFrame, *fields: str, columns: list = None) -> DataFrame:
     """수치형 컬럼에 대한 사분위수 및 IQR 기반 이상치 경계를 계산한다.
     전달된 `fields`가 없으면 데이터프레임의 모든 수치형 컬럼을 대상으로 한다.
@@ -228,6 +235,7 @@ def outlier_table(data: DataFrame, *fields: str) -> DataFrame:
     Args:
         data (DataFrame): 분석할 데이터프레임.
         *fields (str): 대상 컬럼명(들). 생략 시 모든 수치형 컬럼 대상.
+        columns (list, optional): 변환할 컬럼명 목록. args와 중복 사용 불가.
     Returns:
         DataFrame: Q1, Q2(중앙값), Q3, IQR, 하한, 상한을 포함한 통계표.
@@ -236,6 +244,11 @@ def outlier_table(data: DataFrame, *fields: str) -> DataFrame:
         from hossam import *
         hs_prep.outlier_table(df, "value")
     """
+    # columns 인자가 있으면 args보다 우선한다.
+    if columns is not None:
+        if args:
+            raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
+        args = columns
     target_fields = list(fields) if fields else list(data.select_dtypes(include=[np.number]).columns)
     result = []
@@ -273,7 +286,7 @@ def outlier_table(data: DataFrame, *fields: str) -> DataFrame:
 # ===================================================================
 # 이상치를 대체값(NaN, 0) 또는 중앙값으로 교체한다
 # ===================================================================
-def replace_outliner(data: DataFrame, method: str = "nan", *fields: str) -> DataFrame:
+def replace_outliner(data: DataFrame, method: str = "nan", *fields: str, columns: list = None) -> DataFrame:
     """이상치 경계값을 넘어가는 데이터를 경계값으로 대체한다.
     Args:
@@ -285,10 +298,16 @@ def replace_outliner(data: DataFrame, method: str = "nan", *fields: str) -> Data
             - most: 최빈값 대체
             - median: 중앙값 대체
         *fields (str): 컬럼명 목록
+        columns (list, optional): 변환할 컬럼명 목록. args와 중복 사용 불가.
     Returns:
         DataFrame: 이상치가 경계값으로 대체된 데이터 프레임
     """
+    # columns 인자가 있으면 args보다 우선한다.
+    if columns is not None:
+        if args:
+            raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
+        args = columns
     # 원본 데이터 프레임 복사
     df = data.copy()
@@ -335,16 +354,22 @@ def replace_outliner(data: DataFrame, method: str = "nan", *fields: str) -> Data
 # ===================================================================
 # 중빈 이상치를 제거한 연처리된 데이터프레임을 반환한다
 # ===================================================================
-def drop_outliner(data: DataFrame, *fields: str) -> DataFrame:
+def drop_outliner(data: DataFrame, *fields: str, columns: list = None) -> DataFrame:
     """이상치를 결측치로 변환한 후 모두 삭제한다.
     Args:
         data (DataFrame): 데이터프레임
         *fields (str): 컬럼명 목록
+        columns (list, optional): 변환할 컬럼명 목록. args와 중복 사용 불가.
     Returns:
         DataFrame: 이상치가 삭제된 데이터프레임
     """
+    # columns 인자가 있으면 args보다 우선한다.
+    if columns is not None:
+        if args:
+            raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
+        args = columns
     df = replace_outliner(data, "nan", *fields)
     return df.dropna()
@@ -353,7 +378,7 @@ def drop_outliner(data: DataFrame, *fields: str) -> DataFrame:
 # ===================================================================
 # 범주 변수를 더미 변수(One-Hot 인코딩)로 변환한다
 # ===================================================================
-def get_dummies(data: DataFrame, *args: str, drop_first=True, dtype="int") -> DataFrame:
+def get_dummies(data: DataFrame, *args: str, columns: list = None, drop_first: bool = True, dtype: str = "int") -> DataFrame:
     """명목형 변수를 더미 변수로 변환한다.
     컬럼명을 지정하면 그 컬럼들만 더미 변수로 변환하고,
@@ -362,6 +387,7 @@ def get_dummies(data: DataFrame, *args: str, drop_first=True, dtype="int") -> Da
     Args:
         data (DataFrame): 데이터프레임
         *args (str): 변환할 컬럼명 목록. 지정하지 않으면 숫자형이 아닌 모든 컬럼 자동 선택.
+        columns (list, optional): 변환할 컬럼명 목록. args와 중복 사용 불가.
         drop_first (bool, optional): 첫 번째 더미 변수 제거 여부. 기본값 True.
         dtype (str, optional): 더미 변수 데이터 타입. 기본값 "int".
@@ -379,6 +405,12 @@ def get_dummies(data: DataFrame, *args: str, drop_first=True, dtype="int") -> Da
         result = hs_prep.get_dummies(df, 'col1', drop_first=False, dtype='bool')
         ```
     """
+    # columns 인자가 있으면 args보다 우선한다.
+    if columns is not None:
+        if args:
+            raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
+        args = columns
     if not args:
         # args가 없으면 숫자 타입이 아닌 모든 컬럼 자동 선택
         cols_to_convert = []

hossam/hs_stats.py CHANGED Viewed

@@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
+from typing import overload, Tuple, Literal, Union
 # -------------------------------------------------------------
 import numpy as np
@@ -28,6 +30,7 @@ from scipy.stats import (
     wilcoxon,
     pearsonr,
     spearmanr,
+    chi2
 )
 import statsmodels.api as sm
@@ -36,10 +39,71 @@ from statsmodels.stats.outliers_influence import variance_inflation_factor
 from statsmodels.stats.multitest import multipletests
 from statsmodels.stats.stattools import durbin_watson
 from statsmodels.regression.linear_model import RegressionResultsWrapper
+from statsmodels.discrete.discrete_model import BinaryResultsWrapper
 from statsmodels.discrete.discrete_model import BinaryResults
 from pingouin import anova, pairwise_tukey, welch_anova, pairwise_gameshowell
+from .hs_plot import ols_residplot, ols_qqplot
+# ===================================================================
+# MCAR(결측치 무작위성) 검정
+# ===================================================================
+def mcar_test(data: DataFrame, columns: list | str | None = None) -> DataFrame:
+    if isinstance(columns, str):
+        columns = [c.strip() for c in columns.split(",")]
+    cols = data.columns if columns is None else columns
+    df = data[cols]
+    # 결측치가 있는 컬럼만 사용
+    cols_with_na = [c for c in df.columns if df[c].isna().any()]
+    if len(cols_with_na) < 2:
+        raise ValueError("MCAR 검정은 결측치가 있는 변수가 최소 2개 이상 필요합니다.")
+    X = df[cols_with_na].to_numpy()
+    n, p = X.shape
+    # complete cases로 평균·공분산 추정
+    complete = ~np.isnan(X).any(axis=1)
+    if complete.sum() < p + 1:
+        raise ValueError("완전관측치(complete cases)가 부족하여 MCAR 검정을 수행할 수 없습니다.")
+    mu = X[complete].mean(axis=0)
+    S = np.cov(X[complete], rowvar=False)
+    S_inv = np.linalg.pinv(S)
+    chi_sq = 0.0
+    dfree = 0
+    for i in range(n):
+        obs = ~np.isnan(X[i])
+        if obs.sum() == p:
+            continue  # complete case는 제외
+        diff = X[i, obs] - mu[obs]
+        S_obs = S[np.ix_(obs, obs)]
+        S_obs_inv = np.linalg.pinv(S_obs)
+        chi_sq += diff @ S_obs_inv @ diff
+        dfree += obs.sum()
+    dfree -= p  # Little's adjustment
+    p_value = 1 - chi2.cdf(chi_sq, dfree)
+    is_mcar = p_value > 0.05
+    return DataFrame([{
+        "statistic": chi_sq,
+        "dof": dfree,
+        "p-value": p_value,
+        "is_mcar": is_mcar,
+        "interpretation": (
+            "결측치는 완전 무작위(MCAR)로 판단됨 → 결측치 삭제 가능"
+            if is_mcar else
+            "결측치는 완전 무작위(MCAR)가 아님 → 삭제 시 편향 가능"
+        )
+    }])
 # ===================================================================
 # 결측치 분석 (Missing Values Analysis)
 # ===================================================================
@@ -219,6 +283,8 @@ def describe(data: DataFrame, *fields: str, columns: list | None = None):
             행은 다음과 같은 통계량을 포함:
             - count (float): 비결측치의 수
+            - na_count (int): 결측치의 수
+            - na_rate (float): 결측치 비율(%)
             - mean (float): 평균값
             - std (float): 표준편차
             - min (float): 최소값
@@ -267,9 +333,13 @@ def describe(data: DataFrame, *fields: str, columns: list | None = None):
     # 기술통계량 구하기
     desc = data[list(fields)].describe().T
-    # 각 컬럼별 결측치 수(null_count) 추가
-    null_counts = data[list(fields)].isnull().sum()
-    desc.insert(1, 'null_count', null_counts)
+    # 각 컬럼별 결측치 수(na_count) 추가
+    na_counts = data[list(fields)].isnull().sum()
+    desc.insert(1, 'na_count', na_counts)
+    # 결측치 비율(na_rate) 추가
+    desc.insert(2, 'na_rate', (na_counts / len(data)) * 100)
     # 추가 통계량 계산
     additional_stats = []
@@ -1192,7 +1262,10 @@ def trend(x: any, y: any, degree: int = 1, value_count: int = 100) -> Tuple[np.n
 # ===================================================================
 # 선형회귀 요약 리포트
 # ===================================================================
-def ols_report(fit, data, full=False, alpha=0.05):
+def ols_report(fit, data, full=False, alpha=0.05) -> Union[
+    Tuple[DataFrame, DataFrame],
+    Tuple[DataFrame, DataFrame, str, str, list[str], str]
+]:
     """선형회귀 적합 결과를 요약 리포트로 변환한다.
     Args:
@@ -1211,6 +1284,7 @@ def ols_report(fit, data, full=False, alpha=0.05):
             - 회귀식 문자열 (`equation_text`, str): 상수항과 계수를 포함한 회귀식 표현.
         full=False일 때:
+            - 성능 지표 표 (`pdf`, DataFrame): R, R², Adj. R², F, p-value, Durbin-Watson.
             - 회귀계수 표 (`rdf`, DataFrame)
     Examples:
@@ -1378,7 +1452,19 @@ def ols_report(fit, data, full=False, alpha=0.05):
 # ===================================================================
 # 선형회귀
 # ===================================================================
-def ols(df: DataFrame, yname: str, report=False):
+def ols(df: DataFrame, yname: str, report: bool | str | int = False) -> Union[
+    RegressionResultsWrapper,
+    Tuple[RegressionResultsWrapper, DataFrame, DataFrame],
+    Tuple[
+        RegressionResultsWrapper,
+        DataFrame,
+        DataFrame,
+        str,
+        str,
+        list[str],
+        str
+    ]
+]:
     """선형회귀분석을 수행하고 적합 결과를 반환한다.
     OLS(Ordinary Least Squares) 선형회귀분석을 실시한다.
@@ -1387,7 +1473,7 @@ def ols(df: DataFrame, yname: str, report=False):
     Args:
         df (DataFrame): 종속변수와 독립변수를 모두 포함한 데이터프레임.
         yname (str): 종속변수 컬럼명.
-        report: 리포트 모드 설정. 다음 값 중 하나:
+        report (bool | str | int): 리포트 모드 설정. 다음 값 중 하나:
             - False (기본값): 리포트 미사용. fit 객체만 반환.
             - 1 또는 'summary': 요약 리포트 반환 (full=False).
             - 2 또는 'full': 풀 리포트 반환 (full=True).
@@ -1426,10 +1512,10 @@ def ols(df: DataFrame, yname: str, report=False):
         fit = hs_stats.ols(df, 'target')
         # 요약 리포트 반환
-        fit, pdf, rdf = hs_stats.ols(df, 'target', report=1)
+        fit, pdf, rdf = hs_stats.ols(df, 'target', report='summary')
         # 풀 리포트 반환
-        fit, pdf, rdf, result_report, model_report, var_reports, eq = hs_stats.ols(df, 'target', report=2)
+        fit, pdf, rdf, result_report, model_report, var_reports, eq = hs_stats.ols(df, 'target', report='full')
         ```
     """
     x = df.drop(yname, axis=1)
@@ -1459,15 +1545,31 @@ def ols(df: DataFrame, yname: str, report=False):
 # ===================================================================
 # 로지스틱 회귀 요약 리포트
 # ===================================================================
-def logit_report(fit, data, threshold=0.5, full=False, alpha=0.05):
+def logit_report(
+    fit: BinaryResultsWrapper,
+    data: DataFrame,
+    threshold: float = 0.5,
+    full: Union[bool, str, int] = False,
+    alpha: float = 0.05
+) -> Union[
+    Tuple[DataFrame, DataFrame],
+    Tuple[
+        DataFrame,
+        DataFrame,
+        str,
+        str,
+        list[str],
+        np.ndarray
+    ]
+]:
     """로지스틱 회귀 적합 결과를 상세 리포트로 변환한다.
     Args:
         fit: statsmodels Logit 결과 객체 (`fit.summary()`와 예측 확률을 지원해야 함).
-        data: 종속변수와 독립변수를 모두 포함한 DataFrame.
-        threshold: 예측 확률을 이진 분류로 변환할 임계값. 기본값 0.5.
-        full: True이면 6개 값 반환, False이면 주요 2개(cdf, rdf)만 반환. 기본값 False.
-        alpha: 유의수준. 기본값 0.05.
+        data (DataFrame): 종속변수와 독립변수를 모두 포함한 DataFrame.
+        threshold (float): 예측 확률을 이진 분류로 변환할 임계값. 기본값 0.5.
+        full (bool | str | int): True이면 6개 값 반환, False이면 주요 2개(cdf, rdf)만 반환. 기본값 False.
+        alpha (float): 유의수준. 기본값 0.05.
     Returns:
         tuple: full=True일 때 다음 요소를 포함한다.
@@ -1652,7 +1754,25 @@ def logit_report(fit, data, threshold=0.5, full=False, alpha=0.05):
 # ===================================================================
 # 로지스틱 회귀
 # ===================================================================
-def logit(df: DataFrame, yname: str, report=False):
+def logit(
+    df: DataFrame,
+    yname: str,
+    report: Union[bool, str, int] = False
+) -> Union[
+    BinaryResultsWrapper,
+    Tuple[
+        BinaryResultsWrapper,
+        DataFrame
+    ],
+    Tuple[
+        BinaryResultsWrapper,
+        DataFrame,
+        DataFrame,
+        str,
+        str,
+        List[str]
+    ]
+]:
     """로지스틱 회귀분석을 수행하고 적합 결과를 반환한다.
     종속변수가 이항(binary) 형태일 때 로지스틱 회귀분석을 실시한다.
@@ -1734,7 +1854,7 @@ def logit(df: DataFrame, yname: str, report=False):
 # ===================================================================
 # 선형성 검정 (Linearity Test)
 # ===================================================================
-def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05) -> DataFrame:
+def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05, plot: bool = False, title: str = None, save_path: str = None) -> DataFrame:
     """회귀모형의 선형성을 Ramsey RESET 검정으로 평가한다.
     적합된 회귀모형에 대해 Ramsey RESET(Regression Specification Error Test) 검정을 수행하여
@@ -1747,6 +1867,9 @@ def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05) -> DataFrame:
                                power=2일 때 예측값의 제곱항이 추가됨.
                                power가 클수록 더 높은 차수의 비선형성을 감지.
         alpha (float, optional): 유의수준. 기본값 0.05.
+        plot (bool, optional): True이면 잔차 플롯을 출력. 기본값 False.
+        title (str, optional): 플롯 제목. 기본값 None.
+        save_path (str, optional): 플롯을 저장할 경로. 기본값 None
     Returns:
         DataFrame: 선형성 검정 결과를 포함한 데이터프레임.
@@ -1829,13 +1952,16 @@ def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05) -> DataFrame:
         "해석": [interpretation]
     })
+    if plot:
+        ols_residplot(fit, lowess=True, mse=True, title=title, save_path=save_path)
     return result_df
 # ===================================================================
 # 정규성 검정 (Normality Test)
 # ===================================================================
-def ols_normality_test(fit, alpha: float = 0.05) -> DataFrame:
+def ols_normality_test(fit, alpha: float = 0.05, plot: bool = False, title: str = None, save_path: str = None) -> DataFrame:
     """회귀모형 잔차의 정규성을 검정한다.
     회귀모형의 잔차가 정규분포를 따르는지 Shapiro-Wilk 검정과 Jarque-Bera 검정으로 평가한다.
@@ -1844,6 +1970,9 @@ def ols_normality_test(fit, alpha: float = 0.05) -> DataFrame:
     Args:
         fit: 회귀 모형 객체 (statsmodels의 RegressionResultsWrapper).
         alpha (float, optional): 유의수준. 기본값 0.05.
+        plot (bool, optional): True이면 Q-Q 플롯을 출력. 기본값 False.
+        title (str, optional): 플롯 제목. 기본값 None.
+        save_path (str, optional): 플롯을 저장할 경로. 기본값 None
     Returns:
         DataFrame: 정규성 검정 결과를 포함한 데이터프레임.
@@ -1922,6 +2051,10 @@ def ols_normality_test(fit, alpha: float = 0.05) -> DataFrame:
     if not results:
         raise ValueError("정규성 검정을 수행할 수 없습니다.")
+    if plot:
+        ols_qqplot(fit, title=title, save_path=save_path)
     result_df = DataFrame(results)
     return result_df

{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hossam
-Version: 0.4.2
+Version: 0.4.4
 Summary: Hossam Data Helper
 Author-email: Lee Kwang-Ho <leekh4232@gmail.com>
 License-Expression: MIT

{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/RECORD RENAMED Viewed

@@ -3,14 +3,14 @@ hossam/__init__.py,sha256=OkMeP15jt6aCy7QNXMtkO0YRVvgOQYumkb7GuVKrbcs,2712
 hossam/data_loader.py,sha256=oUIsqbHQoRiHA_1tdElDaYo1ipmUB5fYSXYMB5gLOl0,6395
 hossam/hs_classroom.py,sha256=rgayol3U5PSo4rLfdbClfiAtG21bFrASaSW56PUsjus,27144
 hossam/hs_gis.py,sha256=DLogaf5nxJBbG-d8QoH2g8UfZ1omMtmEXDYgNg8jtT0,11410
-hossam/hs_plot.py,sha256=-ByDla2La34l6zI_T_e-FHAT3d8hLVo5J-pLBP8PixE,78285
-hossam/hs_prep.py,sha256=2ptFFxV4G1IFmy-B89TqXaPkA8jROZutr2XIkaXNHW4,36006
-hossam/hs_stats.py,sha256=qAor-RE5qNsytoZW1mriK3yql9PVif5bBGyG64YC2PM,110780
+hossam/hs_plot.py,sha256=tsJMi2q9SzHRSs25dXsHkkImW-Jk7su1M6TbKwX9koU,83887
+hossam/hs_prep.py,sha256=ocZNGzHzqgasVNLcb_LClTZaAeTYiIg4mzrixeEzBQU,37693
+hossam/hs_stats.py,sha256=LpUG8U9ybnh6qSMW2SKCSDJZTeMhLH2xH2Pj4i7U6TU,114889
 hossam/hs_timeserise.py,sha256=gSj3cPgOGLOZEXhfW1anXbwpoJja847ZY9F8l9piJPE,42601
 hossam/hs_util.py,sha256=8byLj_VR93vS__lyf0xgQKArgMy9qFm2VvZVSCxfQX0,8444
 hossam/leekh.png,sha256=1PB5NQ24SDoHA5KMiBBsWpSa3iniFcwFTuGwuOsTHfI,6395
-hossam-0.4.2.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
-hossam-0.4.2.dist-info/METADATA,sha256=IKZmX6E8biC7B8I7HdGsCopOizJu_SgYmOiDTSXcKX4,3676
-hossam-0.4.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hossam-0.4.2.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
-hossam-0.4.2.dist-info/RECORD,,
+hossam-0.4.4.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
+hossam-0.4.4.dist-info/METADATA,sha256=R6qOrcnZhbTzUrRK2x9vNksDjw8rVK1DVZrbRIPSPQQ,3676
+hossam-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hossam-0.4.4.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
+hossam-0.4.4.dist-info/RECORD,,

{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{hossam-0.4.2.dist-info → hossam-0.4.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

hossam 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

hossam 0.4.2py3-none-any.whl → 0.4.4py3-none-any.whl