PyPI - hossam - Versions diffs - 0.4.8__py3-none-any.whl → 0.4.11__py3-none-any.whl - Mend

hossam 0.4.8py3-none-any.whl → 0.4.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

hossam/__init__.py +3 -1
hossam/hs_classroom.py +1 -1
hossam/hs_cluster.py +119 -0
hossam/hs_plot.py +288 -100
hossam/hs_timeserise.py +1 -1
hossam/hs_util.py +44 -0
{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/METADATA +5 -3
hossam-0.4.11.dist-info/RECORD +16 -0
{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/WHEEL +1 -1
hossam-0.4.8.dist-info/RECORD +0 -15
{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/licenses/LICENSE +0 -0
{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/top_level.txt +0 -0

hossam/__init__.py CHANGED Viewed

@@ -6,8 +6,10 @@ from . import hs_prep
 from . import hs_stats
 from . import hs_timeserise
 from . import hs_util
+from . import hs_cluster
 from .hs_util import load_info
 from .hs_util import _load_data_remote as load_data
+from .hs_plot import visualize_silhouette
 # py-modules
 import sys
@@ -24,7 +26,7 @@ except Exception:
 my_dpi = hs_plot.config.dpi
-__all__ = ["my_dpi", "load_data", "load_info", "hs_classroom", "hs_gis", "hs_plot", "hs_prep", "hs_stats", "hs_timeserise", "hs_util"]
+__all__ = ["my_dpi", "load_data", "load_info", "hs_classroom", "hs_gis", "hs_plot", "hs_prep", "hs_stats", "hs_timeserise", "hs_util", "hs_cluster", "visualize_silhouette"]
 # 내부 모듈에서 hs_fig를 사용할 때는 아래와 같이 import 하세요.
 # from hossam import hs_fig

hossam/hs_classroom.py CHANGED Viewed

@@ -8,8 +8,8 @@ from kmodes.kmodes import KModes
 from matplotlib import pyplot as plt
 import seaborn as sns
 from .hs_util import load_data, pretty_table
-from . import hs_plot
 from .hs_plot import config
+from . import hs_plot
 # ===================================================================
 # 학생들을 관심사와 성적으로 균형잡힌 조로 편성한다

hossam/hs_cluster.py ADDED Viewed

@@ -0,0 +1,119 @@
+from typing import Literal
+from kneed import KneeLocator
+from pandas import Series
+from matplotlib.pyplot import Axes # type: ignore
+from . import hs_plot
+import numpy as np
+def elbow_point(
+        x: Series | np.ndarray | list,
+        y: Series | np.ndarray | list,
+        dir: Literal["left,down", "left,up", "right,down", "right,up"] = "left,down",
+        S: float = 0.1,
+        plot: bool = True,
+        title: str = None,
+        marker: str = None,
+        width: int = hs_plot.config.width,
+        height: int = hs_plot.config.height,
+        dpi: int = hs_plot.config.dpi,
+        linewidth: int = hs_plot.config.line_width,
+        save_path: str | None = None,
+        ax: Axes | None = None,
+        **params,
+) -> tuple:
+    """
+    엘보우(Elbow) 포인트를 자동으로 탐지하는 함수.
+    주어진 x, y 값의 곡선에서 KneeLocator를 활용해 엘보우(혹은 니) 포인트를 탐지하고, 필요시 시각화까지 지원함.
+    Args:
+        x (Series | np.ndarray | list): x축 값(일반적으로 K값 등).
+        y (Series | np.ndarray | list): y축 값(일반적으로 inertia, SSE 등).
+        dir (Literal["left,down", "left,up", "right,down", "right,up"], optional):
+            곡선의 방향 및 형태 지정. 기본값은 "left,down".
+            - "left,down": 왼쪽에서 오른쪽으로 감소(볼록)
+            - "left,up": 왼쪽에서 오른쪽으로 증가(오목)
+            - "right,down": 오른쪽에서 왼쪽으로 감소(볼록)
+            - "right,up": 오른쪽에서 왼쪽으로 증가(오목)
+        S (float, optional): KneeLocator의 민감도 파라미터. 기본값 0.1.
+        plot (bool, optional): True면 결과를 시각화함. 기본값 True.
+        title (str, optional): 플롯 제목.
+        marker (str, optional): 마커 스타일.
+        width (int, optional): 플롯 가로 크기.
+        height (int, optional): 플롯 세로 크기.
+        dpi (int, optional): 플롯 해상도.
+        linewidth (int, optional): 선 두께.
+        save_path (str | None, optional): 저장 경로 지정시 파일로 저장.
+        ax (Axes | None, optional): 기존 matplotlib Axes 객체. None이면 새로 생성.
+        **params: lineplot에 전달할 추가 파라미터.
+    Returns:
+        tuple: (best_x, best_y)
+            - best_x: 엘보우 포인트의 x값(예: 최적 K)
+            - best_y: 엘보우 포인트의 y값
+    Examples:
+        ```python
+        x = [1, 2, 3, 4, 5, 6]
+        y = [100, 80, 60, 45, 44, 43]
+        elbow_point(x, y)
+        ```
+    Note:
+        - KneeLocator는 kneed 패키지의 클래스로, 곡선의 형태(curve)와 방향(direction)에 따라 엘보우 포인트를 탐지함.
+        - dir 파라미터에 따라 curve/direction이 자동 지정됨.
+        - plot=True일 때, 엘보우 포인트에 수직/수평선과 텍스트가 표시됨.
+    """
+    if dir == "left,down":
+        curve = "convex"
+        direction = "decreasing"
+    elif dir == "left,up":
+        curve = "concave"
+        direction = "increasing"
+    elif dir == "right,down":
+        curve = "convex"
+        direction = "increasing"
+    else:
+        curve = "concave"
+        direction = "decreasing"
+    kn = KneeLocator(x=x, y=y, curve=curve, direction=direction, S=S)
+    best_x = kn.elbow
+    best_y = kn.elbow_y
+    if plot:
+        def hvline(ax):
+            ax.axvline(best_x, color="red", linestyle="--", linewidth=0.7)
+            ax.axhline(best_y, color="red", linestyle="--", linewidth=0.7)
+            ax.text(
+                best_x + 0.1,
+                best_y + 0.1,
+                "Best K=%d" % best_x,
+                fontsize=8,
+                ha="left",
+                va="bottom",
+                color="r",
+            )
+        hs_plot.lineplot(
+                df = None,
+                xname = x,
+                yname = y,
+                title = title,
+                marker = marker,
+                width = width,
+                height = height,
+                linewidth = linewidth,
+                dpi = dpi,
+                save_path = save_path,
+                callback = hvline,
+                ax = ax,
+                **params
+        )
+    return best_x, best_y

hossam/hs_plot.py CHANGED Viewed

@@ -5,10 +5,10 @@ from typing import Callable
 # ===================================================================
 import numpy as np
-import pandas as pd
 import seaborn as sb
 import matplotlib.pyplot as plt
 from matplotlib.pyplot import Axes # type: ignore
+from pandas import Series, DataFrame
 from math import sqrt
 from pandas import DataFrame
@@ -22,12 +22,16 @@ from statsmodels.nonparametric.smoothers_lowess import lowess as sm_lowess
 from statannotations.Annotator import Annotator
 # ===================================================================
+from sklearn.cluster._kmeans import KMeans
 from sklearn.metrics import (
     mean_squared_error,
     ConfusionMatrixDisplay,
     roc_curve,
     auc,
-    confusion_matrix
+    confusion_matrix,
+    silhouette_score,
+    silhouette_samples
 )
 # ===================================================================
@@ -196,9 +200,9 @@ def show_figure(ax: Axes | np.ndarray, callback: Callable | None = None, outpara
 # 선 그래프를 그린다
 # ===================================================================
 def lineplot(
-    df: DataFrame,
-    xname: str | None = None,
-    yname: str | None = None,
+    df: DataFrame | None = None,
+    xname: str | Series | np.ndarray | list | None = None,
+    yname: str | Series | np.ndarray | list | None = None,
     hue: str | None = None,
     title: str | None = None,
     marker: str | None = None,
@@ -215,13 +219,13 @@ def lineplot(
     """선 그래프를 그린다.
     Args:
-        df (DataFrame): 시각화할 데이터.
-        xname (str|None): x축 컬럼명.
-        yname (str|None): y축 컬럼명.
-        hue (str|None): 범주 구분 컬럼명.
-        title (str|None): 그래프 제목.
-        marker (str|None): 마커 모양.
-        palette (str|None): 팔레트 이름.
+        df (DataFrame | None): 시각화할 데이터.
+        xname (str | Series | np.ndarray | list | None): x축 컬럼명 혹은 x축 값 시퀀스.
+        yname (str | Series | np.ndarray | list | None): y축 컬럼명 혹은 y축 값 시퀀스.
+        hue (str | None): 범주 구분 컬럼명.
+        title (str | None): 그래프 제목.
+        marker (str | None): 마커 모양.
+        palette (str | None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
         height (int): 캔버스 세로 픽셀.
         linewidth (float): 선 굵기.
@@ -708,6 +712,8 @@ def scatterplot(
     xname: str,
     yname: str,
     hue=None,
+    vector: str | None = None,
+    outline: bool = False,
     title: str | None = None,
     palette: str | None = None,
     width: int = config.width,
@@ -726,6 +732,8 @@ def scatterplot(
         xname (str): x축 컬럼.
         yname (str): y축 컬럼.
         hue (str|None): 범주 컬럼.
+        vector (str|None): 벡터 종류 컬럼.
+        outline (bool): 점 외곽선 표시 여부.
         title (str|None): 그래프 제목.
         palette (str|None): 팔레트 이름.
         width (int): 캔버스 가로 픽셀.
@@ -745,9 +753,32 @@ def scatterplot(
         fig, ax = get_default_ax(width, height, 1, 1, dpi) # type: ignore
         outparams = True
+    if outline and hue is not None:
+        # 군집별 값의 종류별로 반복 수행
+        for c in df[hue].unique():
+            if c == -1:
+                continue
+            # 한 종류만 필터링한 결과에서 두 변수만 선택
+            df_c = df.loc[df[hue] == c, [xname, yname]]
+            try:
+                # 외각선 좌표 계산
+                hull = ConvexHull(df_c)
+                # 마지막 좌표 이후에 첫 번째 좌표를 연결
+                points = np.append(hull.vertices, hull.vertices[0])
+                ax.plot(    # type: ignore
+                    df_c.iloc[points, 0], df_c.iloc[points, 1], linewidth=linewidth, linestyle=":"
+                )
+                ax.fill(df_c.iloc[points, 0], df_c.iloc[points, 1], alpha=0.1)  # type: ignore
+            except:
+                pass
     # hue가 있을 때만 palette 사용, 없으면 color 사용
     scatterplot_kwargs = {
-        "data": df,
         "x": xname,
         "y": yname,
         "hue": hue,
@@ -762,7 +793,30 @@ def scatterplot(
     scatterplot_kwargs.update(params)
-    sb.scatterplot(**scatterplot_kwargs)
+    # 백터 종류 구분 필드가 전달되지 않은 경우에는 원본 데이터를 그대로 사용
+    if vector is None:
+        sb.scatterplot(data=df, **scatterplot_kwargs)
+    else:
+        # 핵심벡터
+        scatterplot_kwargs['edgecolor'] = '#ffffff'
+        sb.scatterplot(data=df[df[vector] == "core"], **scatterplot_kwargs)
+        # 외곽백터
+        scatterplot_kwargs['edgecolor'] = '#000000'
+        scatterplot_kwargs['s'] = 25
+        scatterplot_kwargs['marker'] = '^'
+        scatterplot_kwargs['linewidth'] = 0.8
+        sb.scatterplot(data=df[df[vector] == "border"], **scatterplot_kwargs)
+        # 노이즈벡터
+        scatterplot_kwargs['edgecolor'] = None
+        scatterplot_kwargs['s'] = 25
+        scatterplot_kwargs['marker'] = 'x'
+        scatterplot_kwargs['linewidth'] = 2
+        scatterplot_kwargs['color'] = '#ff0000'
+        scatterplot_kwargs['hue'] = None
+        sb.scatterplot(data=df[df[vector] == "noise"], **scatterplot_kwargs)
     finalize_plot(ax, callback, outparams, save_path, True, title) # type: ignore
@@ -1479,79 +1533,6 @@ def heatmap(
     finalize_plot(ax, callback, outparams, save_path, True, title)  # type: ignore
-# ===================================================================
-# 클러스터별 볼록 경계막(convex hull)을 그린다
-# ===================================================================
-def convex_hull(
-    data: DataFrame,
-    xname: str,
-    yname: str,
-    hue: str | None = None,
-    title: str | None = None,
-    palette: str | None = None,
-    width: int = config.width,
-    height: int = config.height,
-    linewidth: float = config.line_width,
-    dpi: int = config.dpi,
-    save_path: str | None = None,
-    callback: Callable | None = None,
-    ax: Axes | None = None,
-    **params,
-):
-    """클러스터별 볼록 껍질(convex hull)과 산점도를 그린다.
-    Args:
-        data (DataFrame): 시각화할 데이터.
-        xname (str): x축 컬럼.
-        yname (str): y축 컬럼.
-        hue (str): 클러스터/범주 컬럼.
-        title (str|None): 그래프 제목.
-        palette (str|None): 팔레트 이름.
-        width (int): 캔버스 가로 픽셀.
-        height (int): 캔버스 세로 픽셀.
-        linewidth (float): 선 굵기.
-        dpi (int): 그림 크기 및 해상도.
-        callback (Callable|None): Axes 후처리 콜백.
-        ax (Axes|None): 외부에서 전달한 Axes.
-        **params: seaborn scatterplot 추가 인자.
-    Returns:
-        None
-    """
-    outparams = False
-    if ax is None:
-        fig, ax = get_default_ax(width, height, 1, 1, dpi)  # type: ignore
-        outparams = True
-    # 군집별 값의 종류별로 반복 수행
-    for c in data[hue].unique():
-        if c == -1:
-            continue
-        # 한 종류만 필터링한 결과에서 두 변수만 선택
-        df_c = data.loc[data[hue] == c, [xname, yname]]
-        try:
-            # 외각선 좌표 계산
-            hull = ConvexHull(df_c)
-            # 마지막 좌표 이후에 첫 번째 좌표를 연결
-            points = np.append(hull.vertices, hull.vertices[0])
-            ax.plot(    # type: ignore
-                df_c.iloc[points, 0], df_c.iloc[points, 1], linewidth=linewidth, linestyle=":"
-            )
-            ax.fill(df_c.iloc[points, 0], df_c.iloc[points, 1], alpha=0.1)  # type: ignore
-        except:
-            pass
-    # convex_hull은 hue가 필수이므로 palette를 그대로 사용
-    sb.scatterplot(
-        data=data, x=xname, y=yname, hue=hue, palette=palette, ax=ax, **params
-    )
-    finalize_plot(ax, callback, outparams, save_path, True, title)  # type: ignore
 # ===================================================================
 # KDE와 신뢰구간을 나타낸 그래프를 그린다
@@ -2045,16 +2026,8 @@ def scatter_by_class(
                 processed.append([item, yname])
         group = processed
-    if outline:
-        for v in group:
-            convex_hull(data=data, xname=v[0], yname=v[1], hue=hue, palette=palette,
-                        width=width, height=height, linewidth=linewidth, dpi=dpi, callback=callback,
-                        save_path=save_path)
-    else:
-        for v in group:
-            scatterplot(data=data, xname=v[0], yname=v[1], hue=hue, palette=palette,
-                        width=width, height=height, linewidth=linewidth, dpi=dpi, callback=callback,
-                        save_path=save_path)    # type: ignore
+    for v in group:
+        scatterplot(data=data, xname=v[0], yname=v[1], outline=outline, hue=hue, palette=palette, width=width, height=height, linewidth=linewidth, dpi=dpi, callback=callback, save_path=save_path)    # type: ignore
 # ===================================================================
@@ -2149,8 +2122,8 @@ def categorical_target_distribution(
 # ===================================================================
 def roc_curve_plot(
     fit,
-    y: np.ndarray | pd.Series | None = None,
-    X: pd.DataFrame | np.ndarray | None = None,
+    y: np.ndarray | Series | None = None,
+    X: DataFrame | np.ndarray | None = None,
     title: str | None = None,
     width: int = config.height,
     height: int = config.height,
@@ -2477,7 +2450,7 @@ def distribution_plot(
             if hue not in data.columns:
                 raise ValueError(f"hue column '{hue}' not found in DataFrame")
-            categories = list(pd.Series(data[hue].dropna().unique()).sort_values())
+            categories = list(Series(data[hue].dropna().unique()).sort_values())
             n_cat = len(categories) if categories else 1
             fig, axes = get_default_ax(width, height, rows=n_cat, cols=2, dpi=dpi, title=title)
@@ -2519,3 +2492,218 @@ def distribution_plot(
                 plt.close()
             else:
                 plt.show()
+def silhouette_plot(
+        estimator: KMeans,
+        data: DataFrame,
+        title: str | None = None,
+        width: int = config.width,
+        height: int = config.height,
+        linewidth: float = config.line_width,
+        dpi: int = config.dpi,
+        save_path: str | None = None,
+        callback: Callable | None = None,
+        ax: Axes | None = None,
+    ) -> None:
+    """
+    군집분석 결과의 실루엣 플롯을 시각화함.
+    Args:
+        estimator (KMeans): 학습된 KMeans 군집 모델 객체.
+        data (DataFrame): 군집분석에 사용된 입력 데이터 (n_samples, n_features).
+        title (str, optional): 플롯 제목. None이면 자동 생성.
+        width (int, optional): 플롯 가로 크기 (inch 단위).
+        height (int, optional): 플롯 세로 크기 (inch 단위).
+        linewidth (float, optional): 기준선 등 선 두께.
+        dpi (int, optional): 플롯 해상도(DPI).
+        save_path (str, optional): 저장 경로 지정 시 파일로 저장.
+        callback (Callable, optional): 추가 커스텀 콜백 함수.
+        ax (Axes, optional): 기존 matplotlib Axes 객체. None이면 새로 생성.
+    Returns:
+        None
+    Note:
+        - 각 군집별 실루엣 계수 분포를 막대그래프로 시각화
+        - 군집 품질(응집도/분리도) 평가에 활용
+        - 붉은색 세로선은 전체 평균 실루엣 스코어를 의미
+    """
+    outparams = False
+    if ax is None:
+        fig, ax = get_default_ax(width, height, 1, 1, dpi) # type: ignore
+        outparams = True
+    sil_avg = silhouette_score(X=data, labels=estimator.labels_)
+    sil_values = silhouette_samples(X=data, labels=estimator.labels_)
+    y_lower = 10
+    # 클러스터링 갯수별로 fill_betweenx( )형태의 막대 그래프 표현.
+    for i in range(estimator.n_clusters):   # type: ignore
+        ith_cluster_sil_values = sil_values[estimator.labels_ == i] # type: ignore
+        ith_cluster_sil_values.sort()   # type: ignore
+        size_cluster_i = ith_cluster_sil_values.shape[0]    # type: ignore
+        y_upper = y_lower + size_cluster_i
+        ax.fill_betweenx(   # type: ignore
+            np.arange(y_lower, y_upper),
+            0,
+            ith_cluster_sil_values,
+            alpha=0.7,
+        )
+        ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))  # type: ignore
+        y_lower = y_upper + 10
+    ax.axvline(x=sil_avg, color="red", linestyle="--", linewidth=linewidth)  # type: ignore
+    ax.set_xlabel("The silhouette coefficient values")  # type: ignore
+    ax.set_ylabel("Cluster label")  # type: ignore
+    ax.set_xlim([-0.1, 1])  # type: ignore
+    ax.set_ylim([0, len(data) + (estimator.n_clusters + 1) * 10])  # type: ignore
+    ax.set_yticks([])  # type: ignore
+    ax.set_xticks([0, 0.2, 0.4, 0.6, 0.8, 1]) # type: ignore
+    if title is None:
+        title = "Number of Cluster : " + str(estimator.n_clusters) + ", Silhouette Score :" + str(round(sil_avg, 3))    # type: ignore
+    finalize_plot(ax, callback, outparams, save_path, True, title) # type: ignore
+def cluster_plot(
+    estimator: KMeans,
+    data: DataFrame,
+    xname: str | None = None,
+    yname: str | None = None,
+    hue: str | None = None,
+    title: str | None = None,
+    palette: str | None = None,
+    outline: bool = False,
+    width: int = config.width,
+    height: int = config.height,
+    linewidth: float = config.line_width,
+    dpi: int = config.dpi,
+    save_path: str | None = None,
+    ax: Axes | None = None,
+) -> None:
+    """
+    2차원 공간에서 군집분석 결과를 산점도로 시각화함.
+    Args:
+        estimator (KMeans): 학습된 KMeans 군집 모델 객체.
+        data (DataFrame): 군집분석에 사용된 입력 데이터 (n_samples, n_features).
+        xname (str, optional): x축에 사용할 컬럼명. None이면 첫 번째 컬럼 사용.
+        yname (str, optional): y축에 사용할 컬럼명. None이면 두 번째 컬럼 사용.
+        hue (str, optional): 군집 구분에 사용할 컬럼명. None이면 'cluster' 자동 생성.
+        title (str, optional): 플롯 제목. None이면 기본값 사용.
+        palette (str, optional): 색상 팔레트.
+        outline (bool, optional): 외곽선 표시 여부.
+        width (int, optional): 플롯 가로 크기 (inch 단위).
+        height (int, optional): 플롯 세로 크기 (inch 단위).
+        linewidth (float, optional): 중심점 등 선 두께.
+        dpi (int, optional): 플롯 해상도(DPI).
+        save_path (str, optional): 저장 경로 지정 시 파일로 저장.
+        ax (Axes, optional): 기존 matplotlib Axes 객체. None이면 새로 생성.
+    Returns:
+        None
+    Example:
+        ```python
+        cluster_plot(estimator, data, xname='Sepal.Length', yname='Sepal.Width')
+        ```
+    Note:
+        - 각 군집별 산점도와 중심점(빨간색 원/숫자) 표시
+        - 2차원 특성 공간에서 군집 분포와 분리도 시각화
+    """
+    outparams = False
+    if ax is None:
+        fig, ax = get_default_ax(width, height, 1, 1, dpi) # type: ignore
+        outparams = True
+    df = data.copy()
+    if not hue:
+        df['cluster'] = estimator.labels_   # type: ignore
+        hue = 'cluster'
+    if xname is None:
+        xname = df.columns[0]   # type: ignore
+    if yname is None:
+        yname = df.columns[1]   # type: ignore
+    xindex = df.columns.get_loc(xname)   # type: ignore
+    yindex = df.columns.get_loc(yname)   # type: ignore
+    def callback(ax: Axes) -> None:
+        # 클러스터 중심점 표시
+        centers = estimator.cluster_centers_   # type: ignore
+        ax.scatter(    # type: ignore
+            centers[:, xindex],
+            centers[:, yindex],
+            marker="o",
+            color="white",
+            alpha=1,
+            s=200,
+            edgecolor="r",
+            linewidth=linewidth
+        )
+        for i, c in enumerate(centers):
+            ax.scatter(c[xindex], c[yindex], marker="$%d$" % i, alpha=1, s=50, edgecolor="k")
+        ax.set_xlabel("Feature space for the " + xname)
+        ax.set_ylabel("Feature space for the " + yname)
+    scatterplot(
+        df=df,
+        xname=xname,
+        yname=yname,
+        hue=hue,
+        title="The visualization of the clustered data." if title is None else title,
+        outline=outline,
+        palette=palette,
+        width=width,
+        height=height,
+        linewidth=linewidth,
+        dpi=dpi,
+        save_path=save_path,
+        callback=callback,
+        ax=ax
+    )
+def visualize_silhouette(estimator: KMeans, data: DataFrame) -> None:
+    """
+    군집분석 결과의 실루엣 플롯과 군집 산점도를 한 화면에 함께 시각화함.
+    Args:
+        estimator (KMeans): 학습된 KMeans 군집 모델 객체.
+        data (DataFrame): 군집분석에 사용된 입력 데이터 (n_samples, n_features).
+    Returns:
+        None
+    Note:
+        - 실루엣 플롯(왼쪽)과 2차원 군집 산점도(오른쪽)를 동시에 확인 가능
+        - 군집 품질과 분포를 한눈에 비교·분석할 때 유용
+    """
+    fig, ax = get_default_ax(rows=1, cols=2)
+    silhouette_plot(
+        estimator=estimator,
+        data=data,
+        ax=ax[0],
+    )
+    cluster_plot(
+        estimator=estimator,
+        data=data,
+        ax=ax[1],
+    )
+    finalize_plot(ax)

hossam/hs_timeserise.py CHANGED Viewed

@@ -123,7 +123,7 @@ def diff(
             ardict["기각값(Critical Values) %s" % key] = value
         stationarity = ar[1] <= 0.05
-        ardict["데이터 정상성 여부"] = "정상" if stationarity else "비정상"
+        ardict["데이터 정상성 여부"] = "정상" if stationarity else "비정상" # type: ignore
         ardf = DataFrame(ardict, index=["ADF Test"]).T
         pretty_table(ardf)

hossam/hs_util.py CHANGED Viewed

@@ -2,14 +2,21 @@
 # -------------------------------------------------------------
 import requests
 import json
+import tempfile
+import zipfile
+import shutil
+from pathlib import Path
 from typing import TYPE_CHECKING
 from importlib.metadata import distributions
 import pandas as pd
 import numpy as np
+import glob as gl
+# -------------------------------------------------------------
 from pandas import DataFrame, DatetimeIndex, read_csv, read_excel
 from scipy.stats import normaltest
 from tabulate import tabulate
 from os.path import join, exists
+import os
 from io import BytesIO
 from pandas import DataFrame, read_csv, read_excel
 from typing import Optional, Tuple, Any
@@ -20,6 +27,40 @@ BASE_URL = "https://data.hossam.kr"
 def __get_df(path: str, index_col=None) -> DataFrame:
     p = path.rfind(".")
     exec = path[p+1:].lower()
+    tmp_dir = None
+    # 파일 확장자가 압축파일인 경우 로컬에 파일을 다운로드 후 압축 해제
+    if exec == "zip":
+        tmp_dir = os.getcwd() + "/.hossam_tmp"
+        os.makedirs(tmp_dir, exist_ok=True)
+        zip_path = join(tmp_dir, "data.zip")
+        # 원격 URL인 경우 파일 다운로드
+        if path.lower().startswith(('http://', 'https://')):
+            path = path.replace("\\", "/")
+            with requests.Session() as session:
+                r = session.get(path)
+                if r.status_code != 200:
+                    raise Exception(f"HTTP {r.status_code} Error - {r.reason} > {path}")
+                with open(zip_path, "wb") as f:
+                    f.write(r.content)
+        else:
+            zip_path = Path(path)
+        # 압축 해제
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(tmp_dir)
+        # 압축 해제된 파일 중 첫 번째 파일을 데이터로 로드
+        extracted_files = list(gl.glob(join(tmp_dir, '*')))
+        if not extracted_files:
+            raise FileNotFoundError("압축 파일 내에 데이터 파일이 없습니다.")
+        path = str(extracted_files[0])
+        p = path.rfind(".")
+        exec = path[p+1:].lower()
     if exec == 'xlsx':
         # If path is a remote URL, fetch the file once and reuse the bytes
@@ -60,6 +101,9 @@ def __get_df(path: str, index_col=None) -> DataFrame:
     else:
         df = read_csv(path, index_col=index_col)
+    if tmp_dir:
+        shutil.rmtree(tmp_dir)
     return df
 # -------------------------------------------------------------

{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hossam
-Version: 0.4.8
+Version: 0.4.11
 Summary: Hossam Data Helper
 Author-email: Lee Kwang-Ho <leekh4232@gmail.com>
 License-Expression: MIT
@@ -61,9 +61,9 @@ title: 🎓 Hossam Data Helper
 - 📊 **풍부한 시각화**: 25+ 시각화 함수 (Seaborn/Matplotlib 기반)
 - 🎯 **통계 분석**: 회귀, 분류, 시계열 분석 도구
+- 🤖 **머신 러닝**: 예측, 분류, 군집 학습 모델 구축 및 성능 평가
 - 📦 **샘플 데이터**: 학습용 데이터셋 즉시 로드
 - 🔧 **데이터 전처리**: 결측치 처리, 이상치 탐지, 스케일링
-- 🤖 **MCP 서버**: VSCode/Copilot과 통합 가능한 Model Context Protocol 지원
 - 📈 **교육용 최적화**: 데이터 분석 교육에 특화된 설계
@@ -84,9 +84,11 @@ pip install hossam
 - **hs_plot**: 25+ 시각화 함수 (선 그래프, 산점도, 히스토그램, 박스플롯, 히트맵 등)
 - **hs_stats**: 회귀/분류 분석, 교차검증, 정규성 검정, 상관분석 등
 - **hs_prep**: 결측치 처리, 이상치 탐지, 스케일링, 인코딩 등의 데이터 전처리 기능
-- **hs_timeseries**: 시계열 분석 기능 지원
+- **hs_timeserise**: 시계열 분석 기능 지원
 - **hs_gis**: GIS 데이터 로드 및 시각화 (대한민국 지도 지원)
 - **hs_util**: 예쁜 테이블 출력, 그리드 서치 등
+- **hs_cluster**: 군집분석, PCA 등 (작업중)
+- **hs_ml**: 예측, 분류 분석 (예정)
 자세한 사용법은 [API 문서](https://py.hossam.kr/api/hossam/)를 참고하세요.

hossam-0.4.11.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+hossam/NotoSansKR-Regular.ttf,sha256=0SCufUQwcVWrWTu75j4Lt_V2bgBJIBXl1p8iAJJYkVY,6185516
+hossam/__init__.py,sha256=4cGvavSotmQKhkHS4UCANhzszrMNwXNESAhh0RuFF-w,2893
+hossam/hs_classroom.py,sha256=Sb1thy49LKn2zU90aiOVwHWhyWSMHLZbZX7eXmQlquc,27523
+hossam/hs_cluster.py,sha256=anjoZ12JsIDWoGhm6agd0IF4N_md5czutyW4rbXnDEM,4255
+hossam/hs_gis.py,sha256=DVmndBK-_7GMK3J1_on3ieEQk1S0MfUZ8_wlX-cDdZQ,11581
+hossam/hs_plot.py,sha256=5j498vga_1wZBRlMUZ047LswNSvfTEpGP6uL7yzl3-g,92744
+hossam/hs_prep.py,sha256=ypuX97mCxpo7CLoI_S79bUw7th0ok5LCZjt4vzRaGiI,38326
+hossam/hs_stats.py,sha256=MDS3rvaXDP8aYwcE36JTetWiZgE4fkXnNo0vwlXu-pA,119890
+hossam/hs_timeserise.py,sha256=NzGV4bJmVQr3qUFySOP25qENItmloYjgh3VgwSbSmXc,43163
+hossam/hs_util.py,sha256=ptl-2W7-0Ad_BemZMR8cFnDt6-SHCRRCk1Gh7giFjSs,16149
+hossam/leekh.png,sha256=1PB5NQ24SDoHA5KMiBBsWpSa3iniFcwFTuGwuOsTHfI,6395
+hossam-0.4.11.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
+hossam-0.4.11.dist-info/METADATA,sha256=POoVivyFd3rFLoQny6kigfNmsDbHdyvndbZ71Sz2NjY,3803
+hossam-0.4.11.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
+hossam-0.4.11.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
+hossam-0.4.11.dist-info/RECORD,,

{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

hossam-0.4.8.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-hossam/NotoSansKR-Regular.ttf,sha256=0SCufUQwcVWrWTu75j4Lt_V2bgBJIBXl1p8iAJJYkVY,6185516
-hossam/__init__.py,sha256=kiE_u23uXygPmpukEP-n-szHnM2AE5kWHQICByn3qhA,2788
-hossam/hs_classroom.py,sha256=oNRnHPXOu0-YqtPY7EJeS1qteH0CtKxNk5Lt7opti_w,27523
-hossam/hs_gis.py,sha256=DVmndBK-_7GMK3J1_on3ieEQk1S0MfUZ8_wlX-cDdZQ,11581
-hossam/hs_plot.py,sha256=83B7fjEDaXnpwg8GhDGsVX6lAd81rYqoqvMGzovn3qc,85900
-hossam/hs_prep.py,sha256=ypuX97mCxpo7CLoI_S79bUw7th0ok5LCZjt4vzRaGiI,38326
-hossam/hs_stats.py,sha256=MDS3rvaXDP8aYwcE36JTetWiZgE4fkXnNo0vwlXu-pA,119890
-hossam/hs_timeserise.py,sha256=XB8DKJBFb-892ACNCATcyBliSJVtbn-dpzfKi-grRAo,43148
-hossam/hs_util.py,sha256=i5thXDt4VVWbju3y6Q7PAdEay62b-5PJNX9TjQhFZCM,14663
-hossam/leekh.png,sha256=1PB5NQ24SDoHA5KMiBBsWpSa3iniFcwFTuGwuOsTHfI,6395
-hossam-0.4.8.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
-hossam-0.4.8.dist-info/METADATA,sha256=G29Fmy2WwAUSrqWEWhzntFhICYB3fXge-s8ZFF52riY,3706
-hossam-0.4.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hossam-0.4.8.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
-hossam-0.4.8.dist-info/RECORD,,

{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{hossam-0.4.8.dist-info → hossam-0.4.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

hossam 0.4.8__py3-none-any.whl → 0.4.11__py3-none-any.whl

hossam 0.4.8py3-none-any.whl → 0.4.11py3-none-any.whl