PyPI - hossam - Versions diffs - 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl - Mend

hossam 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

hossam/data_loader.py +7 -9
hossam/hs_gis.py +17 -18
hossam/hs_plot.py +207 -210
hossam/hs_prep.py +29 -30
hossam/hs_stats.py +54 -55
hossam/hs_util.py +4 -6
{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/METADATA +1 -1
hossam-0.4.5.dist-info/RECORD +16 -0
hossam-0.4.4.dist-info/RECORD +0 -16
{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/WHEEL +0 -0
{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/licenses/LICENSE +0 -0
{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/top_level.txt +0 -0

hossam/hs_prep.py CHANGED Viewed

@@ -5,6 +5,7 @@
 import joblib
 import numpy as np
 from itertools import combinations
+from typing import Any
 import pandas as pd
 import jenkspy
@@ -19,7 +20,7 @@ from .hs_util import pretty_table
 # 연속형 변수를 표준정규화(Z-score)로 변환한다
 # ===================================================================
 def standard_scaler(
-    data: any, yname: str | None = None, save_path: str | None = None, load_path: str | None = None
+    data: Any, yname: str | None = None, save_path: str | None = None, load_path: str | None = None
 ) -> DataFrame:
     """연속형 변수에 대해 Standard Scaling을 수행한다.
@@ -54,7 +55,7 @@ def standard_scaler(
         sdata = scaler.transform(arr) if load_path else scaler.fit_transform(arr)
         if save_path:
             joblib.dump(value=scaler, filename=save_path)
-        return sdata
+        return sdata # type: ignore
     df = data.copy()
@@ -90,7 +91,7 @@ def standard_scaler(
 # 연속형 변수를 0부터 1 사이의 값으로 정규화한다
 # ===================================================================
 def minmax_scaler(
-    data: any, yname: str | None = None, save_path: str | None = None, load_path: str | None = None
+    data: Any, yname: str | None = None, save_path: str | None = None, load_path: str | None = None
 ) -> DataFrame:
     """연속형 변수에 대해 MinMax Scaling을 수행한다.
@@ -123,7 +124,7 @@ def minmax_scaler(
         sdata = scaler.transform(arr) if load_path else scaler.fit_transform(arr)
         if save_path:
             joblib.dump(scaler, save_path)
-        return sdata
+        return sdata # type: ignore
     df = data.copy()
@@ -158,7 +159,7 @@ def minmax_scaler(
 # ===================================================================
 # 지정된 컬럼들을 범주형 데이터로 설정한다
 # ===================================================================
-def set_category(data: DataFrame, *args: str, columns: list = None) -> DataFrame:
+def set_category(data: DataFrame, *args: str, columns: list | None = None) -> DataFrame:
     """카테고리 데이터를 설정한다.
     Args:
@@ -173,7 +174,7 @@ def set_category(data: DataFrame, *args: str, columns: list = None) -> DataFrame
     if columns is not None:
         if args:
             raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
-        args = columns
+        args = columns # type: ignore
     df = data.copy()
@@ -226,7 +227,7 @@ def unmelt(
 # ===================================================================
 # 지정된 변수의 이상치 테이블로 반환한다
 # ===================================================================
-def outlier_table(data: DataFrame, *fields: str, columns: list = None) -> DataFrame:
+def outlier_table(data: DataFrame, *fields: str, columns: list | None = None) -> DataFrame:
     """수치형 컬럼에 대한 사분위수 및 IQR 기반 이상치 경계를 계산한다.
     전달된 `fields`가 없으면 데이터프레임의 모든 수치형 컬럼을 대상으로 한다.
@@ -246,7 +247,7 @@ def outlier_table(data: DataFrame, *fields: str, columns: list = None) -> DataFr
     """
     # columns 인자가 있으면 args보다 우선한다.
     if columns is not None:
-        if args:
+        if args: # type: ignore
             raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
         args = columns
@@ -286,7 +287,7 @@ def outlier_table(data: DataFrame, *fields: str, columns: list = None) -> DataFr
 # ===================================================================
 # 이상치를 대체값(NaN, 0) 또는 중앙값으로 교체한다
 # ===================================================================
-def replace_outliner(data: DataFrame, method: str = "nan", *fields: str, columns: list = None) -> DataFrame:
+def replace_outliner(data: DataFrame, method: str = "nan", *fields: str, columns: list | None = None) -> DataFrame:
     """이상치 경계값을 넘어가는 데이터를 경계값으로 대체한다.
     Args:
@@ -305,7 +306,7 @@ def replace_outliner(data: DataFrame, method: str = "nan", *fields: str, columns
     """
     # columns 인자가 있으면 args보다 우선한다.
     if columns is not None:
-        if args:
+        if args: # type: ignore
             raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
         args = columns
@@ -354,7 +355,7 @@ def replace_outliner(data: DataFrame, method: str = "nan", *fields: str, columns
 # ===================================================================
 # 중빈 이상치를 제거한 연처리된 데이터프레임을 반환한다
 # ===================================================================
-def drop_outliner(data: DataFrame, *fields: str, columns: list = None) -> DataFrame:
+def drop_outliner(data: DataFrame, *fields: str, columns: list | None = None) -> DataFrame:
     """이상치를 결측치로 변환한 후 모두 삭제한다.
     Args:
@@ -367,7 +368,7 @@ def drop_outliner(data: DataFrame, *fields: str, columns: list = None) -> DataFr
     """
     # columns 인자가 있으면 args보다 우선한다.
     if columns is not None:
-        if args:
+        if args: # type: ignore
             raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
         args = columns
@@ -378,7 +379,7 @@ def drop_outliner(data: DataFrame, *fields: str, columns: list = None) -> DataFr
 # ===================================================================
 # 범주 변수를 더미 변수(One-Hot 인코딩)로 변환한다
 # ===================================================================
-def get_dummies(data: DataFrame, *args: str, columns: list = None, drop_first: bool = True, dtype: str = "int") -> DataFrame:
+def get_dummies(data: DataFrame, *args: str, columns: list | None = None, drop_first: bool = True, dtype: str = "int") -> DataFrame:
     """명목형 변수를 더미 변수로 변환한다.
     컬럼명을 지정하면 그 컬럼들만 더미 변수로 변환하고,
@@ -409,7 +410,7 @@ def get_dummies(data: DataFrame, *args: str, columns: list = None, drop_first: b
     if columns is not None:
         if args:
             raise ValueError("args와 columns 인자는 중복 사용할 수 없습니다.")
-        args = columns
+        args = columns # type: ignore
     if not args:
         # args가 없으면 숫자 타입이 아닌 모든 컬럼 자동 선택
@@ -417,13 +418,13 @@ def get_dummies(data: DataFrame, *args: str, columns: list = None, drop_first: b
         for f in data.columns:
             if not pd.api.types.is_numeric_dtype(data[f]):
                 cols_to_convert.append(f)
-        args = cols_to_convert
+        args = cols_to_convert # type: ignore
     else:
         # args가 있으면 그 컬럼들만 사용 (존재 여부 확인)
-        args = [c for c in args if c in data.columns]
+        args = [c for c in args if c in data.columns] # type: ignore
     # pandas.get_dummies 사용 (재귀 문제 없음)
-    return pd.get_dummies(data, columns=args, drop_first=drop_first, dtype=dtype) if args else data.copy()
+    return pd.get_dummies(data, columns=args, drop_first=drop_first, dtype=dtype) if args else data.copy() # type: ignore
 # ===================================================================
@@ -630,7 +631,7 @@ def bin_continuous(
         if apply_labels:
             # 숫자 인덱스 사용 (0, 1, 2, ...)
             numeric_labels = list(range(len(edges) - 1))
-            df[new_col] = pd.cut(series, bins=edges, labels=numeric_labels, include_lowest=True, ordered=False)
+            df[new_col] = pd.cut(series, bins=edges, labels=numeric_labels, include_lowest=True, ordered=False) # type: ignore
         else:
             # 문자 레이블 적용
             if labels is None:
@@ -645,9 +646,9 @@ def bin_continuous(
                     except:
                         pass
                     auto_labels.append(f"{left}~{right}")
-                df[new_col] = pd.cut(series, bins=edges, labels=auto_labels, include_lowest=True, ordered=False)
+                df[new_col] = pd.cut(series, bins=edges, labels=auto_labels, include_lowest=True, ordered=False) # type: ignore
             else:
-                df[new_col] = pd.cut(series, bins=edges, labels=labels, include_lowest=True, ordered=False)
+                df[new_col] = pd.cut(series, bins=edges, labels=labels, include_lowest=True, ordered=False) # type: ignore
         df[new_col] = df[new_col].astype("category")
         return df
@@ -671,26 +672,24 @@ def bin_continuous(
             n_bins = len(edges) - 1
             if apply_labels:
                 numeric_labels = list(range(n_bins))
-                df[new_col] = pd.cut(series, bins=edges, labels=numeric_labels, include_lowest=True, ordered=False)
+                df[new_col] = pd.cut(series, bins=edges, labels=numeric_labels, include_lowest=True, ordered=False) # type: ignore
             else:
                 if labels is None:
                     position_labels = [f"Q{i+1}" for i in range(n_bins)]
-                    df[new_col] = pd.cut(
-                        series, bins=edges, labels=position_labels, include_lowest=True, ordered=False
-                    )
+                    df[new_col] = pd.cut(series, bins=edges, labels=position_labels, include_lowest=True, ordered=False) # type: ignore
                 else:
-                    df[new_col] = pd.cut(series, bins=edges, labels=labels, include_lowest=True, ordered=False)
+                    df[new_col] = pd.cut(series, bins=edges, labels=labels, include_lowest=True, ordered=False) # type: ignore
         df[new_col] = df[new_col].astype("category")
         return df
     # 자연 구간화 (Jenks) - 의존성 없으면 분위수로 폴백
     if method_key in {"natural_breaks", "natural", "jenks"}:
         k = bins if isinstance(bins, int) and bins > 1 else 5
-        series_nonnull = series.dropna()
+        series_nonnull = series.dropna() # type: ignore
         k = min(k, max(2, series_nonnull.nunique()))
         edges = None
         try:
-            edges = jenkspy.jenks_breaks(series_nonnull.to_list(), nb_class=k)
+            edges = jenkspy.jenks_breaks(series_nonnull.to_list(), nb_class=k) # type: ignore
             edges[0] = -np.inf
             edges[-1] = np.inf
         except Exception:
@@ -730,7 +729,7 @@ def bin_continuous(
             if apply_labels:
                 # 숫자 인덱스 사용
                 numeric_labels = list(range(len(cut_edges) - 1))
-                df[new_col] = pd.cut(series, bins=cut_edges, labels=numeric_labels, include_lowest=True, ordered=False)
+                df[new_col] = pd.cut(series, bins=cut_edges, labels=numeric_labels, include_lowest=True, ordered=False) # type: ignore
             else:
                 if labels is None:
                     auto_labels = []
@@ -744,9 +743,9 @@ def bin_continuous(
                         except:
                             pass
                         auto_labels.append(f"{left}~{right}")
-                    df[new_col] = pd.cut(series, bins=cut_edges, labels=auto_labels, include_lowest=True, ordered=False)
+                    df[new_col] = pd.cut(series, bins=cut_edges, labels=auto_labels, include_lowest=True, ordered=False) # type: ignore
                 else:
-                    df[new_col] = pd.cut(series, bins=cut_edges, labels=labels, include_lowest=True, ordered=False)
+                    df[new_col] = pd.cut(series, bins=cut_edges, labels=labels, include_lowest=True, ordered=False) # type: ignore
             df[new_col] = df[new_col].astype("category")
         return df

hossam/hs_stats.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import annotations
-from typing import overload, Tuple, Literal, Union
+from typing import overload, Tuple, Literal, Union, Any
 # -------------------------------------------------------------
 import numpy as np
@@ -24,9 +24,9 @@ from scipy.stats import (
     normaltest,
     bartlett,
     levene,
-    ttest_1samp,
+    ttest_1samp,    # type: ignore
     ttest_ind as scipy_ttest_ind,
-    ttest_rel,
+    ttest_rel,      # type: ignore
     wilcoxon,
     pearsonr,
     spearmanr,
@@ -375,29 +375,29 @@ def describe(data: DataFrame, *fields: str, columns: list | None = None):
         outlier_rate = (outlier_count / len(data)) * 100
         # 분포 특성 판정 (왜도 기준)
-        abs_skew = abs(skew)
-        if abs_skew < 0.5:
+        abs_skew = abs(skew)    # type: ignore
+        if abs_skew < 0.5:      # type: ignore
             dist = "거의 대칭"
-        elif abs_skew < 1.0:
-            if skew > 0:
+        elif abs_skew < 1.0:    # type: ignore
+            if skew > 0:        # type: ignore
                 dist = "약한 우측 꼬리"
             else:
                 dist = "약한 좌측 꼬리"
-        elif abs_skew < 2.0:
-            if skew > 0:
+        elif abs_skew < 2.0:    # type: ignore
+            if skew > 0:        # type: ignore
                 dist = "중간 우측 꼬리"
             else:
                 dist = "중간 좌측 꼬리"
         else:
-            if skew > 0:
+            if skew > 0:        # type: ignore
                 dist = "극단 우측 꼬리"
             else:
                 dist = "극단 좌측 꼬리"
         # 로그변환 필요성 판정
-        if abs_skew < 0.5:
+        if abs_skew < 0.5:      # type: ignore
             log_need = "낮음"
-        elif abs_skew < 1.0:
+        elif abs_skew < 1.0:    # type: ignore
             log_need = "중간"
         else:
             log_need = "높음"
@@ -473,7 +473,7 @@ def category_describe(data: DataFrame, *fields: str):
     """
     if not fields:
         # 명목형(범주형) 컬럼 선택: object, category, bool 타입
-        fields = data.select_dtypes(include=['object', 'category', 'bool']).columns
+        fields = data.select_dtypes(include=['object', 'category', 'bool']).columns # type: ignore
     result = []
     summary = []
@@ -730,7 +730,7 @@ def equal_var_test(data: DataFrame, columns: list | str | None = None, normal_di
         normality_result = normal_test(data[numeric_cols], method="n")
         # 모든 컬럼이 정규분포를 따르는지 확인
         all_normal = normality_result["is_normal"].all()
-        normal_dist = all_normal
+        normal_dist = all_normal    # type: ignore
     try:
         if normal_dist:
@@ -829,7 +829,7 @@ def ttest_1samp(data, mean_value: float = 0.0) -> DataFrame:
     else:
         for a in alternative:
             try:
-                s, p = ttest_1samp(col_data, mean_value, alternative=a)
+                s, p = ttest_1samp(col_data, mean_value, alternative=a) # type: ignore
                 itp = None
@@ -939,26 +939,26 @@ def ttest_ind(x, y, equal_var: bool | None = None) -> DataFrame:
     for a in alternative:
         try:
-            s, p = scipy_ttest_ind(x_data, y_data, equal_var=equal_var, alternative=a)
+            s, p = scipy_ttest_ind(x_data, y_data, equal_var=equal_var, alternative=a)  # type: ignore
             n = "t-test_ind" if equal_var else "Welch's t-test"
             # 검정 결과 해석
             itp = None
             if a == "two-sided":
-                itp = fmt.format("==" if p > 0.05 else "!=")
+                itp = fmt.format("==" if p > 0.05 else "!=")    # type: ignore
             elif a == "less":
-                itp = fmt.format(">=" if p > 0.05 else "<")
+                itp = fmt.format(">=" if p > 0.05 else "<")     # type: ignore
             else:
-                itp = fmt.format("<=" if p > 0.05 else ">")
+                itp = fmt.format("<=" if p > 0.05 else ">")     # type: ignore
             result.append({
                 "test": n,
                 "alternative": a,
-                "statistic": round(s, 3),
-                "p-value": round(p, 4),
-                "H0": p > 0.05,
-                "H1": p <= 0.05,
+                "statistic": round(s, 3),   # type: ignore
+                "p-value": round(p, 4),     # type: ignore
+                "H0": p > 0.05,             # type: ignore
+                "H1": p <= 0.05,            # type: ignore
                 "interpretation": itp,
                 "equal_var_checked": var_checked
             })
@@ -1068,7 +1068,7 @@ def ttest_rel(x, y, parametric: bool | None = None) -> DataFrame:
     for a in alternative:
         try:
             if parametric:
-                s, p = ttest_rel(x_data, y_data, alternative=a)
+                s, p = ttest_rel(x_data, y_data, alternative=a) # type: ignore
                 n = "t-test_paired"
             else:
                 # Wilcoxon signed-rank test (대응표본용 비모수 검정)
@@ -1078,19 +1078,19 @@ def ttest_rel(x, y, parametric: bool | None = None) -> DataFrame:
             itp = None
             if a == "two-sided":
-                itp = fmt.format("==" if p > 0.05 else "!=")
+                itp = fmt.format("==" if p > 0.05 else "!=")    # type: ignore
             elif a == "less":
-                itp = fmt.format(">=" if p > 0.05 else "<")
+                itp = fmt.format(">=" if p > 0.05 else "<")     # type: ignore
             else:
-                itp = fmt.format("<=" if p > 0.05 else ">")
+                itp = fmt.format("<=" if p > 0.05 else ">")     # type: ignore
             result.append({
                 "test": n,
                 "alternative": a,
-                "statistic": round(s, 3) if not np.isnan(s) else s,
-                "p-value": round(p, 4) if not np.isnan(p) else p,
-                "H0": p > 0.05,
-                "H1": p <= 0.05,
+                "statistic": round(s, 3) if not np.isnan(s) else s, # type: ignore
+                "p-value": round(p, 4) if not np.isnan(p) else p,   # type: ignore
+                "H0": p > 0.05,     # type: ignore
+                "H1": p <= 0.05,    # type: ignore
                 "interpretation": itp,
                 "normality_checked": var_checked
             })
@@ -1117,7 +1117,7 @@ def ttest_rel(x, y, parametric: bool | None = None) -> DataFrame:
 # ===================================================================
 def vif_filter(
     data: DataFrame,
-    yname: str = None,
+    yname: str | None = None,
     ignore: list | None = None,
     threshold: float = 10.0,
     verbose: bool = False,
@@ -1182,7 +1182,7 @@ def vif_filter(
         for i, col in enumerate(X_clean.columns, start=0):
             # exog의 첫 열은 상수항이므로 변수 인덱스는 +1
             try:
-                vifs[col] = float(variance_inflation_factor(exog.values, i + 1))
+                vifs[col] = float(variance_inflation_factor(exog.values, i + 1))# type: ignore
             except Exception:
                 # 계산 실패 시 무한대로 처리하여 우선 제거 대상으로
                 vifs[col] = float("inf")
@@ -1220,7 +1220,7 @@ def vif_filter(
 # ===================================================================
 # x, y 데이터에 대한 추세선을 구한다.
 # ===================================================================
-def trend(x: any, y: any, degree: int = 1, value_count: int = 100) -> Tuple[np.ndarray, np.ndarray]:
+def trend(x: Any, y: Any, degree: int = 1, value_count: int = 100) -> Tuple[np.ndarray, np.ndarray]:
     """x, y 데이터에 대한 추세선을 구한다.
     Args:
@@ -1324,7 +1324,7 @@ def ols_report(fit, data, full=False, alpha=0.05) -> Union[
     for i, col in enumerate(indi_df.columns, start=1):  # 상수항이 0이므로 1부터 시작
         try:
             with np.errstate(divide='ignore', invalid='ignore'):
-                vif_value = variance_inflation_factor(indi_df_const.values, i)
+                vif_value = variance_inflation_factor(indi_df_const.values, i)  # type: ignore
                 # inf나 매우 큰 값 처리
                 if np.isinf(vif_value) or vif_value > 1e10:
                     vif_dict[col] = np.inf
@@ -1531,11 +1531,11 @@ def ols(df: DataFrame, yname: str, report: bool | str | int = False) -> Union[
         return linear_fit
     elif report == 1 or report == 'summary':
         # 요약 리포트 (full=False)
-        pdf, rdf = ols_report(linear_fit, df, full=False, alpha=0.05)
+        pdf, rdf = ols_report(linear_fit, df, full=False, alpha=0.05)   # type: ignore
         return linear_fit, pdf, rdf
     elif report == 2 or report == 'full' or report is True:
         # 풀 리포트 (full=True)
-        pdf, rdf, result_report, model_report, variable_reports, equation_text = ols_report(linear_fit, df, full=True, alpha=0.05)
+        pdf, rdf, result_report, model_report, variable_reports, equation_text = ols_report(linear_fit, df, full=True, alpha=0.05)  # type: ignore
         return linear_fit, pdf, rdf, result_report, model_report, variable_reports, equation_text
     else:
         # 기본값: 리포트 미사용
@@ -1657,7 +1657,7 @@ def logit_report(
     vif_dict = {}
     x_const = sm.add_constant(x, has_constant="add")
     for i, col in enumerate(x.columns, start=1):  # 상수항이 0이므로 1부터 시작
-        vif_dict[col] = variance_inflation_factor(x_const.values, i)
+        vif_dict[col] = variance_inflation_factor(x_const.values, i)    # type: ignore
     for idx, row in tbl.iterrows():
         name = idx
@@ -1770,7 +1770,7 @@ def logit(
         DataFrame,
         str,
         str,
-        List[str]
+        list[str]
     ]
 ]:
     """로지스틱 회귀분석을 수행하고 적합 결과를 반환한다.
@@ -1838,13 +1838,13 @@ def logit(
         return logit_fit
     elif report == 1 or report == 'summary':
         # 요약 리포트 (full=False)
-        cdf, rdf = logit_report(logit_fit, df, threshold=0.5, full=False, alpha=0.05)
+        cdf, rdf = logit_report(logit_fit, df, threshold=0.5, full=False, alpha=0.05)   # type: ignore
         # 요약에서는 result_report와 variable_reports만 포함
         # 간단한 버전으로 result와 variable_reports만 생성
         return logit_fit, rdf
     elif report == 2 or report == 'full' or report is True:
         # 풀 리포트 (full=True)
-        cdf, rdf, result_report, model_report, variable_reports, cm = logit_report(logit_fit, df, threshold=0.5, full=True, alpha=0.05)
+        cdf, rdf, result_report, model_report, variable_reports, cm = logit_report(logit_fit, df, threshold=0.5, full=True, alpha=0.05) # type: ignore
         return logit_fit, cdf, rdf, result_report, model_report, variable_reports
     else:
         # 기본값: 리포트 미사용
@@ -1854,7 +1854,7 @@ def logit(
 # ===================================================================
 # 선형성 검정 (Linearity Test)
 # ===================================================================
-def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05, plot: bool = False, title: str = None, save_path: str = None) -> DataFrame:
+def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05, plot: bool = False, title: str | None = None, save_path: str | None = None) -> DataFrame:
     """회귀모형의 선형성을 Ramsey RESET 검정으로 평가한다.
     적합된 회귀모형에 대해 Ramsey RESET(Regression Specification Error Test) 검정을 수행하여
@@ -1961,7 +1961,7 @@ def ols_linearity_test(fit, power: int = 2, alpha: float = 0.05, plot: bool = Fa
 # ===================================================================
 # 정규성 검정 (Normality Test)
 # ===================================================================
-def ols_normality_test(fit, alpha: float = 0.05, plot: bool = False, title: str = None, save_path: str = None) -> DataFrame:
+def ols_normality_test(fit, alpha: float = 0.05, plot: bool = False, title: str | None = None, save_path: str | None = None) -> DataFrame:
     """회귀모형 잔차의 정규성을 검정한다.
     회귀모형의 잔차가 정규분포를 따르는지 Shapiro-Wilk 검정과 Jarque-Bera 검정으로 평가한다.
@@ -2029,7 +2029,7 @@ def ols_normality_test(fit, alpha: float = 0.05, plot: bool = False, title: str
     # 2. Jarque-Bera 검정 (항상 수행)
     try:
         stat_jb, p_jb = jarque_bera(residuals)
-        significant_jb = p_jb <= alpha
+        significant_jb = p_jb <= alpha  # type: ignore
         if significant_jb:
             interpretation_jb = f"정규성 위반 (p={p_jb:.4f} <= {alpha})"
@@ -2362,8 +2362,8 @@ def corr_pairwise(
             corr_val, pval = np.nan, np.nan
         # 4) 유의성, 강도
-        significant = False if np.isnan(pval) else pval <= alpha
-        abs_r = abs(corr_val) if not np.isnan(corr_val) else 0
+        significant = False if np.isnan(pval) else pval <= alpha    # type: ignore
+        abs_r = abs(corr_val) if not np.isnan(corr_val) else 0      # type: ignore
         if abs_r > 0.7:
             strength = "strong"
         elif abs_r > 0.3:
@@ -2530,13 +2530,13 @@ def oneway_anova(data: DataFrame, dv: str, between: str, alpha: float = 0.05) ->
         anova_df['significant'] = anova_df['p-unc'] <= alpha
     # ANOVA 결과가 유의한지 확인
-    p_unc = float(anova_df.loc[0, 'p-unc'])
+    p_unc = float(anova_df.loc[0, 'p-unc']) # type: ignore
     anova_significant = p_unc <= alpha
     # ANOVA 보고 문장 생성
     def _safe_get(col: str, default: float = np.nan) -> float:
         try:
-            return float(anova_df.loc[0, col]) if col in anova_df.columns else default
+            return float(anova_df.loc[0, col]) if col in anova_df.columns else default  # type: ignore
         except Exception:
             return default
@@ -2851,7 +2851,7 @@ def predict(fit, data: DataFrame | Series) -> DataFrame | Series | float:
         # Series 입력인 경우 단일 값 반환
         if is_series:
-            return float(predictions.iloc[0])
+            return float(predictions.iloc[0])   # type: ignore
         # DataFrame 입력인 경우
         if isinstance(data, DataFrame):
@@ -2924,8 +2924,7 @@ def corr_effect_size(data: DataFrame, dv: str, *fields: str, alpha: float = 0.05
     # fields가 지정되지 않으면 수치형 컬럼 중 dv 제외 모두 사용
     if not fields:
-        fields = [col for col in data.columns
-                 if is_numeric_dtype(data[col]) and col != dv]
+        fields = [col for col in data.columns if is_numeric_dtype(data[col]) and col != dv] # type: ignore
     # dv가 수치형인지 확인
     if not is_numeric_dtype(data[dv]):
@@ -2953,8 +2952,8 @@ def corr_effect_size(data: DataFrame, dv: str, *fields: str, alpha: float = 0.05
         normal_y_result = normal_test(data[[dv]], columns=[dv], method=method_y)
         # 정규성 판정 (p > alpha면 정규분포 가정)
-        normal_x = normal_x_result.loc[var, 'p-val'] > alpha if var in normal_x_result.index else False
-        normal_y = normal_y_result.loc[dv, 'p-val'] > alpha if dv in normal_y_result.index else False
+        normal_x = normal_x_result.loc[var, 'p-val'] > alpha if var in normal_x_result.index else False     # type: ignore
+        normal_y = normal_y_result.loc[dv, 'p-val'] > alpha if dv in normal_y_result.index else False   # type: ignore
         # Pearson (모두 정규) vs Spearman (하나라도 비정규)
         if normal_x and normal_y:
@@ -2966,8 +2965,8 @@ def corr_effect_size(data: DataFrame, dv: str, *fields: str, alpha: float = 0.05
         # Cohen's d 계산 (상관계수에서 효과크기로 변환)
         # d = 2*r / sqrt(1-r^2)
-        if r**2 < 1:
-            d = (2 * r) / np.sqrt(1 - r**2)
+        if r ** 2 < 1:    # type: ignore
+            d = (2 * r) / np.sqrt(1 - r ** 2) # type: ignore
         else:
             d = 0

hossam/hs_util.py CHANGED Viewed

@@ -122,11 +122,9 @@ def pretty_table(data: DataFrame, tablefmt="simple", headers: str = "keys") -> N
         ```
     """
-    tabulate.WIDE_CHARS_MODE = False
+    tabulate.WIDE_CHARS_MODE = False # type: ignore
     print(
-        tabulate(
-            data, headers=headers, tablefmt=tablefmt, showindex=True, numalign="right"
-        )
+        tabulate(data, headers=headers, tablefmt=tablefmt, showindex=True, numalign="right") # type: ignore
     )
@@ -167,7 +165,7 @@ def __data_info(
     if info:
         print("\n✅ 테이블 정보")
-        pretty_table(data.info(), tablefmt="pretty")
+        pretty_table(data.info(), tablefmt="pretty") # type: ignore
         print("\n✅ 상위 5개 행")
         pretty_table(data.head(), tablefmt="pretty")
@@ -229,7 +227,7 @@ def load_data(key: str,
     elif k.endswith(".csv"):
         origin = read_csv(key)
     else:
-        origin = _load_data_remote(key, local)
+        origin = _load_data_remote(key, local) # type: ignore
     if origin is None:
         raise RuntimeError("Data loading failed: origin is None")

{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hossam
-Version: 0.4.4
+Version: 0.4.5
 Summary: Hossam Data Helper
 Author-email: Lee Kwang-Ho <leekh4232@gmail.com>
 License-Expression: MIT

hossam-0.4.5.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+hossam/NotoSansKR-Regular.ttf,sha256=0SCufUQwcVWrWTu75j4Lt_V2bgBJIBXl1p8iAJJYkVY,6185516
+hossam/__init__.py,sha256=OkMeP15jt6aCy7QNXMtkO0YRVvgOQYumkb7GuVKrbcs,2712
+hossam/data_loader.py,sha256=K0-MJaVeedF5x8mSp22X2rD_CZ-T185EhoUFEqzP8Ss,6352
+hossam/hs_classroom.py,sha256=rgayol3U5PSo4rLfdbClfiAtG21bFrASaSW56PUsjus,27144
+hossam/hs_gis.py,sha256=DVmndBK-_7GMK3J1_on3ieEQk1S0MfUZ8_wlX-cDdZQ,11581
+hossam/hs_plot.py,sha256=3j9B69pl-zQM_09lTXxLKAMaDM0vwOTsUWbzcU8hCK8,86228
+hossam/hs_prep.py,sha256=kCmFxnMyFZ5tLUfoE8msbwTracajHAmruJbFj6A6eIU,38020
+hossam/hs_stats.py,sha256=uGYkEk8Rb8qMoZ5FiZ7Yg6jssLIGl_EBbmwvvSYljhQ,115780
+hossam/hs_timeserise.py,sha256=gSj3cPgOGLOZEXhfW1anXbwpoJja847ZY9F8l9piJPE,42601
+hossam/hs_util.py,sha256=xuNXC6FJSAmyAbcRAUMsigCKHXM25t3H90nFMgq7IBs,8482
+hossam/leekh.png,sha256=1PB5NQ24SDoHA5KMiBBsWpSa3iniFcwFTuGwuOsTHfI,6395
+hossam-0.4.5.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
+hossam-0.4.5.dist-info/METADATA,sha256=HM5qrrvaFZWAyUlhgV_BLPHAcxEZdZ4gp2p3V4X4pzo,3676
+hossam-0.4.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hossam-0.4.5.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
+hossam-0.4.5.dist-info/RECORD,,

hossam-0.4.4.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-hossam/NotoSansKR-Regular.ttf,sha256=0SCufUQwcVWrWTu75j4Lt_V2bgBJIBXl1p8iAJJYkVY,6185516
-hossam/__init__.py,sha256=OkMeP15jt6aCy7QNXMtkO0YRVvgOQYumkb7GuVKrbcs,2712
-hossam/data_loader.py,sha256=oUIsqbHQoRiHA_1tdElDaYo1ipmUB5fYSXYMB5gLOl0,6395
-hossam/hs_classroom.py,sha256=rgayol3U5PSo4rLfdbClfiAtG21bFrASaSW56PUsjus,27144
-hossam/hs_gis.py,sha256=DLogaf5nxJBbG-d8QoH2g8UfZ1omMtmEXDYgNg8jtT0,11410
-hossam/hs_plot.py,sha256=tsJMi2q9SzHRSs25dXsHkkImW-Jk7su1M6TbKwX9koU,83887
-hossam/hs_prep.py,sha256=ocZNGzHzqgasVNLcb_LClTZaAeTYiIg4mzrixeEzBQU,37693
-hossam/hs_stats.py,sha256=LpUG8U9ybnh6qSMW2SKCSDJZTeMhLH2xH2Pj4i7U6TU,114889
-hossam/hs_timeserise.py,sha256=gSj3cPgOGLOZEXhfW1anXbwpoJja847ZY9F8l9piJPE,42601
-hossam/hs_util.py,sha256=8byLj_VR93vS__lyf0xgQKArgMy9qFm2VvZVSCxfQX0,8444
-hossam/leekh.png,sha256=1PB5NQ24SDoHA5KMiBBsWpSa3iniFcwFTuGwuOsTHfI,6395
-hossam-0.4.4.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
-hossam-0.4.4.dist-info/METADATA,sha256=R6qOrcnZhbTzUrRK2x9vNksDjw8rVK1DVZrbRIPSPQQ,3676
-hossam-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-hossam-0.4.4.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
-hossam-0.4.4.dist-info/RECORD,,

{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{hossam-0.4.4.dist-info → hossam-0.4.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

hossam 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl

hossam 0.4.4py3-none-any.whl → 0.4.5py3-none-any.whl