PyPI - hossam - Versions diffs - 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl - Mend

hossam 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

hossam/__init__.py +73 -25
hossam/{classroom.py → hs_classroom.py} +27 -13
hossam/{gis.py → hs_gis.py} +35 -25
hossam/hs_interaction.py +119 -0
hossam/hs_plot.py +2281 -0
hossam/hs_prep.py +597 -0
hossam/hs_stats.py +2704 -0
hossam/hs_timeserise.py +1104 -0
hossam/{util.py → hs_util.py} +35 -25
hossam/leekh.png +0 -0
{hossam-0.3.10.dist-info → hossam-0.3.12.dist-info}/METADATA +49 -46
hossam-0.3.12.dist-info/RECORD +17 -0
hossam/plot.py +0 -1422
hossam/prep.py +0 -394
hossam/stats.py +0 -1037
hossam-0.3.10.dist-info/RECORD +0 -14
{hossam-0.3.10.dist-info → hossam-0.3.12.dist-info}/WHEEL +0 -0
{hossam-0.3.10.dist-info → hossam-0.3.12.dist-info}/licenses/LICENSE +0 -0
{hossam-0.3.10.dist-info → hossam-0.3.12.dist-info}/top_level.txt +0 -0

hossam/__init__.py CHANGED Viewed

@@ -1,37 +1,85 @@
 from .data_loader import load_data, load_info
+from .hs_stats import oneway_anova
 from matplotlib import pyplot as plt
 from matplotlib import font_manager as fm
 from importlib.resources import files, as_file
 from importlib.metadata import version
+from types import SimpleNamespace
 import warnings
 try:
-    __version__ = version('hossam')
+    __version__ = version("hossam")
 except Exception:
-    __version__ = 'develop'
-__all__ = ['load_data', 'load_info']
+    __version__ = "develop"
+hs_fig = SimpleNamespace(
+    dpi=200,
+    width=600,
+    height=320,
+    font_size=6,
+    font_weight="light",
+    frame_width=0.4,
+    line_width=1,
+    grid_alpha=0.3,
+    grid_width=0.4,
+    fill_alpha=0.3
+)
+__all__ = ["load_data", "load_info", "hs_classroom", "hs_gis", "hs_plot", "hs_prep", "hs_stats", "hs_timeserise", "hs_util", "hs_fig"]
-my_dpi = 200  # 이미지 선명도(100~300)
-default_font_size = 6
 def _init_korean_font():
-	"""
-	패키지에 포함된 한글 폰트를 기본 폰트로 설정합니다.
-	"""
-	font_file = 'NotoSansKR-Regular.ttf'
-	try:
-		# 패키지 리소스에서 폰트 파일 경로 확보
-		with as_file(files('hossam') / font_file) as font_path:
-			fm.fontManager.addfont(str(font_path))
-			fprop = fm.FontProperties(fname=str(font_path))
-			fname = fprop.get_name()
-			plt.rcParams['font.family'] = fname
-			plt.rcParams['font.size'] = default_font_size
-			plt.rcParams['axes.unicode_minus'] = False
-			return
-	except Exception as e:
-		warnings.warn(f"한글 폰트 초기화: 패키지 폰트 사용 실패 ({e}).")
-# 모듈 임포트 시점에 폰트 초기화 수행
-_init_korean_font()
+    """
+    패키지에 포함된 한글 폰트를 기본 폰트로 설정합니다.
+    """
+    font_file = "NotoSansKR-Regular.ttf"
+    try:
+        # 패키지 리소스에서 폰트 파일 경로 확보
+        with as_file(files("hossam") / font_file) as font_path:
+            fm.fontManager.addfont(str(font_path))
+            fprop = fm.FontProperties(fname=str(font_path))
+            fname = fprop.get_name()
+            plt.rcParams.update({
+                "font.family": fname,
+                "font.size": hs_fig.font_size,
+                "font.weight": hs_fig.font_weight,
+                "axes.unicode_minus": False,
+                "text.antialiased": True,
+                "lines.antialiased": True,
+                "patch.antialiased": True,
+                "figure.dpi": hs_fig.dpi,
+                "savefig.dpi": hs_fig.dpi * 2,
+                "text.hinting": "auto",
+                "text.hinting_factor": 8,
+                "pdf.fonttype": 42,
+                "ps.fonttype": 42,
+            })
+            print(
+                "\n✅ 시각화를 위한 한글 글꼴(NotoSansKR-Regular)이 자동 적용되었습니다."
+            )
+            return
+    except Exception as e:
+        warnings.warn(f"\n한글 폰트 초기화: 패키지 폰트 사용 실패 ({e}).")
+def _init():
+    # 안내 메시지 (블릿 리스트)
+    messages = [
+        "📦 아이티윌 이광호 강사가 제작한 라이브러리를 사용중입니다.",
+        "📚 자세한 사용 방법은 https://py.hossam.kr 을 참고하세요.",
+        "📧 Email: leekh4232@gmail.com",
+        "🎬 Youtube: https://www.youtube.com/@hossam-codingclub",
+        "📝 Blog: https://blog.hossam.kr/",
+        f"🔖 Version: {__version__}",
+    ]
+    for msg in messages:
+        print(f"{msg}")
+    _init_korean_font()
+_init()

hossam/{classroom.py → hs_classroom.py} RENAMED Viewed

@@ -1,21 +1,17 @@
 # -*- coding: utf-8 -*-
-"""
-학생 조 편성 모듈
-학생들을 균형잡힌 조로 나누기 위한 기능을 제공합니다.
-관심사 기반 1차 군집과 점수/인원 균형 조정을 통해
-동질성 있고 균형잡힌 조를 구성합니다.
-"""
+# ===================================================================
+# 패키지 참조
+# ===================================================================
 import math
 from pandas import DataFrame, qcut, concat, to_numeric
 from kmodes.kmodes import KModes
 from matplotlib import pyplot as plt
 import seaborn as sns
-from hossam import my_dpi
-from hossam.util import hs_load_data, hs_pretty_table
+from hossam.hs_util import load_data, pretty_table
+# ===================================================================
+# 학생들을 관심사와 성적으로 균형잡힌 조로 편성한다
+# ===================================================================
 def cluster_students(
     df,
     n_groups: int,
@@ -63,7 +59,7 @@ def cluster_students(
     # 파일 경로인 경우 데이터프레임으로 로드
     if isinstance(df, str):
-        df = hs_load_data(df, info=False)
+        df = load_data(df, info=False)
     # 입력 검증
     if df is None or len(df) == 0:
@@ -201,6 +197,9 @@ def cluster_students(
     return result
+# ===================================================================
+# 조 내 인원과 성적 균형을 반복 조정하여 최적화한다
+# ===================================================================
 def _balance_groups(
     df: DataFrame,
     n_groups: int,
@@ -333,6 +332,9 @@ def _balance_groups(
     return df
+# ===================================================================
+# 성적 데이터가 없을 때 각 조의 인원 수만 균형조정한다
+# ===================================================================
 def _balance_group_sizes_only(
     df: DataFrame,
     n_groups: int,
@@ -371,6 +373,9 @@ def _balance_group_sizes_only(
     return df
+# ===================================================================
+# 조 편성 결과의 인원, 관심사, 점수 분포를 시각화한다
+# ===================================================================
 def report_summary(df: DataFrame, figsize: tuple = (20, 4.2), dpi: int = None) -> None:
     """조 편성 결과의 요약 통계를 시각화합니다.
@@ -493,6 +498,9 @@ def report_summary(df: DataFrame, figsize: tuple = (20, 4.2), dpi: int = None) -
     plt.show()
+# ===================================================================
+# 조별 점수 분포를 커널 밀도 추정(KDE) 그래프로 시각화한다
+# ===================================================================
 def report_kde(df: DataFrame, metric: str = 'average', figsize: tuple = (20, 8), dpi: int = None) -> None:
     """조별 점수 분포를 KDE(Kernel Density Estimation)로 시각화합니다.
@@ -604,6 +612,9 @@ def report_kde(df: DataFrame, metric: str = 'average', figsize: tuple = (20, 8),
     plt.show()
+# ===================================================================
+# 조별로 학생 목록과 평균 점수를 요약하여 데이터프레임으로 반환한다
+# ===================================================================
 def group_summary(df: DataFrame, name_col: str = '학생번호') -> DataFrame:
     """조별로 학생 목록과 평균 점수를 요약합니다.
@@ -667,6 +678,9 @@ def group_summary(df: DataFrame, name_col: str = '학생번호') -> DataFrame:
     return result_df
+# ===================================================================
+# 학생 조 편성부터 시각화까지의 전체 분석 프로세스를 일괄 실행한다
+# ===================================================================
 def analyze_classroom(
     df,
     n_groups: int,
@@ -735,7 +749,7 @@ def analyze_classroom(
     # 2. 조별 요약
     summary = group_summary(df_result, name_col=name_col)
     print("\n✓ 조별 요약:")
-    hs_pretty_table(summary, tablefmt="pretty")
+    pretty_table(summary, tablefmt="pretty")
     print()
     # 3. 요약 시각화

hossam/{gis.py → hs_gis.py} RENAMED Viewed

@@ -1,20 +1,23 @@
 # -*- coding: utf-8 -*-
-# -------------------------------------------------------------
+# ===================================================================
+# 패키지 참조
+# ===================================================================
+import os
+import time
+import warnings
 import requests
 import concurrent.futures as futures
-from pandas import DataFrame
-import pandas as pd
+from pandas import DataFrame, to_numeric
 from tqdm.auto import tqdm
-import time
-from geopandas import GeoDataFrame, read_file
-import geopandas as gpd
+from geopandas import GeoDataFrame, read_file, points_from_xy
 from pyproj import CRS
-import os
-import warnings
-from .util import hs_pretty_table
+from .hs_util import pretty_table
-# -------------------------------------------------------------
+# ===================================================================
+# 단일 주소를 VWorld API로 지오코딩
+# ===================================================================
 def __geocode_item(session: requests.Session, index: int, addr: str, key: str) -> tuple[float, float]:
     """단일 주소를 VWorld API로 지오코딩합니다.
@@ -79,8 +82,11 @@ def __geocode_item(session: requests.Session, index: int, addr: str, key: str) -
     #print("%s --> (%s, %s)" % (addr, latitude, longitude))
     return result
-# -------------------------------------------------------------
-def hs_geocode(df: DataFrame, addr: str, key: str) -> DataFrame:
+# ===================================================================
+# 주소 컬럼을 일괄 지오코딩하여 위도/경도 컬럼을 추가
+# ===================================================================
+def geocode(df: DataFrame, addr: str, key: str) -> DataFrame:
     """주소 컬럼을 일괄 지오코딩하여 위도/경도 컬럼을 추가합니다.
     Args:
@@ -149,8 +155,10 @@ def hs_geocode(df: DataFrame, addr: str, key: str) -> DataFrame:
     return data
-# -------------------------------------------------------------
-def hs_load_shape(path: str, info: bool = True) -> GeoDataFrame:
+# ===================================================================
+# Shapefile을 읽어 `GeoDataFrame`으로 로드
+# ===================================================================
+def load_shape(path: str, info: bool = True) -> GeoDataFrame:
     """Shapefile을 읽어 `GeoDataFrame`으로 로드합니다.
     Args:
@@ -164,7 +172,7 @@ def hs_load_shape(path: str, info: bool = True) -> GeoDataFrame:
         FileNotFoundError: 파일이 존재하지 않는 경우.
     Examples:
-        >>> from hossam.gis import hs_load_shape
+        >>> from hossam.gis import load_shape
         >>> gdf = hs_load_shape("path/to/file.shp", info=False)
     """
     if not os.path.exists(path):
@@ -174,23 +182,25 @@ def hs_load_shape(path: str, info: bool = True) -> GeoDataFrame:
     if info:
         print("\n✅ 테이블 정보")
-        hs_pretty_table(data.info(), tablefmt="pretty")
+        pretty_table(data.info(), tablefmt="pretty")
         print("\n✅ 상위 5개 행")
-        hs_pretty_table(data.head(), tablefmt="pretty")
+        pretty_table(data.head(), tablefmt="pretty")
         print("\n✅ 하위 5개 행")
-        hs_pretty_table(data.tail(), tablefmt="pretty")
+        pretty_table(data.tail(), tablefmt="pretty")
         print("\n📊 기술통계")
         desc = data.describe().T
         desc["nan"] = data.isnull().sum()
-        hs_pretty_table(desc, tablefmt="pretty")
+        pretty_table(desc, tablefmt="pretty")
     return data
-# -------------------------------------------------------------
-def hs_save_shape(
+# ===================================================================
+# 전처리된 데이터(GeoDataFrame 또는 DataFrame)를 Shapefile 또는 GeoPackage로 저장
+# ===================================================================
+def save_shape(
     gdf: GeoDataFrame | DataFrame,
     path: str,
     crs: str | None = None,
@@ -249,8 +259,8 @@ def hs_save_shape(
         df = gdf.copy()
         # 숫자 변환 및 결측 제거
-        df[lat_col] = pd.to_numeric(df[lat_col], errors="coerce")
-        df[lon_col] = pd.to_numeric(df[lon_col], errors="coerce")
+        df[lat_col] = to_numeric(df[lat_col], errors="coerce")
+        df[lon_col] = to_numeric(df[lon_col], errors="coerce")
         df = df.dropna(subset=[lat_col, lon_col])
         if df.empty:
@@ -258,8 +268,8 @@ def hs_save_shape(
                 "⚠️[ValueError] 유효한 위경도 값이 없어 Shapefile을 생성할 수 없습니다."
             )
-        geometry = gpd.points_from_xy(x=df[lon_col], y=df[lat_col])
-        gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=target_crs)
+        geometry = points_from_xy(x=df[lon_col], y=df[lat_col])
+        gdf = GeoDataFrame(df, geometry=geometry, crs=target_crs)
     else:
         # GeoDataFrame의 CRS 처리: 존재하면 유지, 없으면만 설정
         if gdf.crs is None:

hossam/hs_interaction.py ADDED Viewed

@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+# ===================================================================
+# 상호작용(Interaction) 항 생성 함수
+# ===================================================================
+import numpy as np
+from pandas import DataFrame
+from itertools import combinations
+# ===================================================================
+# 변수 간의 상호작용 항을 추가한 데이터프레임을 반환한다
+# ===================================================================
+def interaction(*fields: str):
+    """변수 간의 상호작용(interaction) 항을 생성하는 데코레이터 함수.
+    사용 방법: 원본 데이터프레임에 대해 호출하면 상호작용 컬럼이 추가된 새 데이터프레임을 반환한다.
+    Args:
+        *fields (str): 상호작용할 변수들의 컬럼명. 2개 이상의 컬럼을 지정하면 모든 조합의 상호작용을 생성.
+                      지정하지 않으면 모든 수치형 컬럼의 모든 2-way 상호작용을 생성.
+    Returns:
+        function: 데이터프레임을 입력받아 상호작용 항이 추가된 데이터프레임을 반환하는 함수.
+    Examples:
+        >>> from hossam.hs_prep import interaction
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'x1': [1, 2, 3], 'x2': [4, 5, 6], 'x3': [7, 8, 9]})
+        # 특정 변수들의 상호작용만 생성
+        >>> result = interaction('x1', 'x2')(df)
+        >>> print(result.columns)  # x1, x2, x3, x1*x2
+        # 모든 2-way 상호작용 생성
+        >>> result = interaction()(df)
+        >>> print(result.columns)  # x1, x2, x3, x1*x2, x1*x3, x2*x3
+    """
+    def wrapper(data: DataFrame) -> DataFrame:
+        df = data.copy()
+        # fields가 비어있으면 모든 수치형 컬럼의 2-way 상호작용 생성
+        if not fields:
+            numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+            cols_to_interact = list(combinations(numeric_cols, 2))
+        else:
+            # fields가 지정된 경우 모든 가능한 조합 생성
+            field_list = [f for f in fields if f in df.columns]
+            if len(field_list) < 2:
+                return df
+            cols_to_interact = list(combinations(field_list, 2))
+        # 상호작용 항 생성
+        for col1, col2 in cols_to_interact:
+            # 두 컬럼이 모두 수치형인지 확인
+            if not (pd.api.types.is_numeric_dtype(df[col1]) and pd.api.types.is_numeric_dtype(df[col2])):
+                continue
+            interaction_col_name = f"{col1}*{col2}"
+            df[interaction_col_name] = df[col1] * df[col2]
+        return df
+    return wrapper
+# ===================================================================
+# 직접 상호작용 항을 추가하는 함수 (데코레이터 없이 직접 사용)
+# ===================================================================
+def add_interaction(data: DataFrame, *fields: str) -> DataFrame:
+    """데이터프레임에 상호작용 항을 추가한다.
+    특정 변수 쌍 또는 모든 수치형 변수 간의 상호작용 항을 생성하여 데이터프레임에 추가한다.
+    Args:
+        data (DataFrame): 원본 데이터프레임.
+        *fields (str): 상호작용할 변수들의 컬럼명 목록.
+                      지정하지 않으면 모든 수치형 컬럼의 모든 2-way 상호작용을 생성.
+                      지정된 경우, 지정된 컬럼들의 모든 조합에 대해 상호작용을 생성.
+    Returns:
+        DataFrame: 상호작용 항이 추가된 새 데이터프레임.
+    Examples:
+        >>> from hossam.hs_prep import add_interaction
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'x1': [1, 2, 3], 'x2': [4, 5, 6], 'x3': [7, 8, 9]})
+        # 특정 변수들의 상호작용만 추가
+        >>> result = add_interaction(df, 'x1', 'x2')
+        >>> print(result.columns)  # x1, x2, x3, x1*x2
+        >>> print(result['x1*x2'].tolist())  # [4, 10, 18]
+        # 모든 2-way 상호작용 추가
+        >>> result = add_interaction(df)
+        >>> print(result.columns)  # x1, x2, x3, x1*x2, x1*x3, x2*x3
+    """
+    df = data.copy()
+    # fields가 비어있으면 모든 수치형 컬럼의 2-way 상호작용 생성
+    if not fields:
+        numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+        cols_to_interact = list(combinations(numeric_cols, 2))
+    else:
+        # fields가 지정된 경우 모든 가능한 조합 생성
+        field_list = [f for f in fields if f in df.columns]
+        if len(field_list) < 2:
+            return df
+        cols_to_interact = list(combinations(field_list, 2))
+    # 상호작용 항 생성
+    for col1, col2 in cols_to_interact:
+        # 두 컬럼이 모두 수치형인지 확인
+        if not (pd.api.types.is_numeric_dtype(df[col1]) and pd.api.types.is_numeric_dtype(df[col2])):
+            continue
+        interaction_col_name = f"{col1}*{col2}"
+        df[interaction_col_name] = df[col1] * df[col2]
+    return df

hossam 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl

hossam 0.3.10py3-none-any.whl → 0.3.12py3-none-any.whl