hossam 0.4.5__py3-none-any.whl → 0.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hossam/__init__.py +2 -1
- hossam/hs_classroom.py +30 -30
- hossam/hs_plot.py +137 -147
- hossam/hs_prep.py +7 -1
- hossam/hs_stats.py +1570 -1459
- hossam/hs_timeserise.py +38 -39
- hossam/hs_util.py +198 -1
- {hossam-0.4.5.dist-info → hossam-0.4.6.dist-info}/METADATA +1 -1
- hossam-0.4.6.dist-info/RECORD +15 -0
- hossam/data_loader.py +0 -203
- hossam-0.4.5.dist-info/RECORD +0 -16
- {hossam-0.4.5.dist-info → hossam-0.4.6.dist-info}/WHEEL +0 -0
- {hossam-0.4.5.dist-info → hossam-0.4.6.dist-info}/licenses/LICENSE +0 -0
- {hossam-0.4.5.dist-info → hossam-0.4.6.dist-info}/top_level.txt +0 -0
hossam/hs_timeserise.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# -------------------------------------------------------------
|
|
3
|
+
from typing import Callable
|
|
3
4
|
import numpy as np
|
|
4
5
|
import datetime as dt
|
|
5
6
|
import concurrent.futures as futures
|
|
@@ -13,7 +14,7 @@ from matplotlib import pyplot as plt
|
|
|
13
14
|
|
|
14
15
|
# -------------------------------------------------------------
|
|
15
16
|
from statsmodels.tsa.stattools import adfuller
|
|
16
|
-
from statsmodels.tsa.seasonal import seasonal_decompose
|
|
17
|
+
from statsmodels.tsa.seasonal import seasonal_decompose # type: ignore
|
|
17
18
|
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
|
|
18
19
|
from statsmodels.tsa.arima.model import ARIMA
|
|
19
20
|
|
|
@@ -40,10 +41,10 @@ def diff(
|
|
|
40
41
|
data: DataFrame,
|
|
41
42
|
yname: str,
|
|
42
43
|
plot: bool = True,
|
|
43
|
-
max_diff: int = None,
|
|
44
|
+
max_diff: int | None = None,
|
|
44
45
|
figsize: tuple = (10, 5),
|
|
45
46
|
dpi: int = 100,
|
|
46
|
-
) ->
|
|
47
|
+
) -> DataFrame:
|
|
47
48
|
"""시계열 데이터의 정상성을 검정하고 차분을 통해 정상성을 확보한다.
|
|
48
49
|
|
|
49
50
|
ADF(Augmented Dickey-Fuller) 검정을 사용하여 시계열 데이터의 정상성을 확인한다.
|
|
@@ -55,7 +56,7 @@ def diff(
|
|
|
55
56
|
yname (str): 정상성 검정 및 차분을 수행할 대상 컬럼명.
|
|
56
57
|
plot (bool, optional): 각 차분 단계마다 시계열 그래프를 표시할지 여부.
|
|
57
58
|
기본값은 True.
|
|
58
|
-
max_diff (int, optional): 최대 차분 횟수 제한. None이면 정상성을 만족할 때까지 반복.
|
|
59
|
+
max_diff (int | None, optional): 최대 차분 횟수 제한. None이면 정상성을 만족할 때까지 반복.
|
|
59
60
|
과도한 차분을 방지하기 위해 설정 권장. 기본값은 None.
|
|
60
61
|
figsize (tuple, optional): 그래프 크기 (width, height). 기본값은 (10, 5).
|
|
61
62
|
dpi (int, optional): 그래프 해상도. 기본값은 100.
|
|
@@ -106,7 +107,7 @@ def diff(
|
|
|
106
107
|
df = df.diff().dropna()
|
|
107
108
|
|
|
108
109
|
if plot:
|
|
109
|
-
lineplot(df=df, yname=yname, xname=df.index, figsize=figsize, dpi=dpi)
|
|
110
|
+
lineplot(df=df, yname=yname, xname=df.index, figsize=figsize, dpi=dpi) # type: ignore
|
|
110
111
|
|
|
111
112
|
# ADF Test
|
|
112
113
|
ar = adfuller(df[yname])
|
|
@@ -118,7 +119,7 @@ def diff(
|
|
|
118
119
|
"관측치 개수(num of observations)": [ar[3]],
|
|
119
120
|
}
|
|
120
121
|
|
|
121
|
-
for key, value in ar[4].items():
|
|
122
|
+
for key, value in ar[4].items(): # type: ignore
|
|
122
123
|
ardict["기각값(Critical Values) %s" % key] = value
|
|
123
124
|
|
|
124
125
|
stationarity = ar[1] <= 0.05
|
|
@@ -131,7 +132,7 @@ def diff(
|
|
|
131
132
|
count += 1
|
|
132
133
|
|
|
133
134
|
# 최대 차분 횟수가 지정되어 있고, 반복회차가 최대 차분 횟수에 도달하면 종료
|
|
134
|
-
if max_diff and count == max_diff:
|
|
135
|
+
if max_diff is not None and count == max_diff:
|
|
135
136
|
break
|
|
136
137
|
|
|
137
138
|
return df
|
|
@@ -190,8 +191,8 @@ def rolling(
|
|
|
190
191
|
|
|
191
192
|
lineplot(
|
|
192
193
|
df=df,
|
|
193
|
-
yname=rolling.name,
|
|
194
|
-
xname=df.index,
|
|
194
|
+
yname=rolling.name, # type: ignore
|
|
195
|
+
xname=df.index, # type: ignore
|
|
195
196
|
figsize=figsize,
|
|
196
197
|
dpi=dpi,
|
|
197
198
|
callback=lambda ax: ax.set_title(f"Rolling (window={window})"),
|
|
@@ -250,8 +251,8 @@ def ewm(
|
|
|
250
251
|
|
|
251
252
|
lineplot(
|
|
252
253
|
df=df,
|
|
253
|
-
yname=ewm.name,
|
|
254
|
-
xname=df.index,
|
|
254
|
+
yname=ewm.name, # type: ignore
|
|
255
|
+
xname=df.index, # type: ignore
|
|
255
256
|
figsize=figsize,
|
|
256
257
|
dpi=dpi,
|
|
257
258
|
callback=lambda ax: ax.set_title(f"Ewm (span={span})"),
|
|
@@ -357,7 +358,7 @@ def seasonal_decompose(
|
|
|
357
358
|
# ===================================================================
|
|
358
359
|
# 시계열 데이터에 대한 학습/테스트 데이터 분할
|
|
359
360
|
# ===================================================================
|
|
360
|
-
def train_test_split(data: DataFrame, test_size: float = 0.2) -> tuple:
|
|
361
|
+
def train_test_split(data: DataFrame, test_size: float = 0.2) -> tuple[DataFrame, DataFrame]:
|
|
361
362
|
"""시계열 데이터를 시간 순서를 유지하며 학습/테스트 세트로 분할한다.
|
|
362
363
|
|
|
363
364
|
일반적인 random split과 달리 시간 순서를 엄격히 유지하여 분할한다.
|
|
@@ -411,7 +412,7 @@ def train_test_split(data: DataFrame, test_size: float = 0.2) -> tuple:
|
|
|
411
412
|
# 자기상관함수(ACF, Autocorrelation Function) 그래프 시각화
|
|
412
413
|
# ===================================================================
|
|
413
414
|
def acf_plot(
|
|
414
|
-
data: Series, figsize: tuple = (10, 5), dpi: int = 100, callback:
|
|
415
|
+
data: Series, figsize: tuple = (10, 5), dpi: int = 100, callback: Callable | None = None
|
|
415
416
|
):
|
|
416
417
|
"""자기상관함수(ACF, Autocorrelation Function) 그래프를 시각화한다.
|
|
417
418
|
|
|
@@ -463,7 +464,7 @@ def acf_plot(
|
|
|
463
464
|
# 편자기상관함수(PACF, Partial Autocorrelation Function) 그래프 시각화
|
|
464
465
|
# ===================================================================
|
|
465
466
|
def pacf_plot(
|
|
466
|
-
data: Series, figsize: tuple = (10, 5), dpi: int = 100, callback:
|
|
467
|
+
data: Series, figsize: tuple = (10, 5), dpi: int = 100, callback: Callable | None = None
|
|
467
468
|
):
|
|
468
469
|
"""편자기상관함수(PACF, Partial Autocorrelation Function) 그래프를 시각화한다.
|
|
469
470
|
|
|
@@ -515,7 +516,7 @@ def pacf_plot(
|
|
|
515
516
|
# ACF와 PACF 그래프 동시 시각화
|
|
516
517
|
# ===================================================================
|
|
517
518
|
def acf_pacf_plot(
|
|
518
|
-
data: Series, figsize: tuple = (10, 5), dpi: int = 100, callback:
|
|
519
|
+
data: Series, figsize: tuple = (10, 5), dpi: int = 100, callback: Callable | None = None
|
|
519
520
|
):
|
|
520
521
|
"""ACF와 PACF 그래프를 동시에 시각화하여 ARIMA 차수를 결정한다.
|
|
521
522
|
|
|
@@ -577,7 +578,7 @@ def arima(
|
|
|
577
578
|
p: int = 3,
|
|
578
579
|
d: int = 3,
|
|
579
580
|
q: int = 3,
|
|
580
|
-
s: int = None,
|
|
581
|
+
s: int | None = None,
|
|
581
582
|
periods: int = 0,
|
|
582
583
|
figsize: tuple = (15, 5),
|
|
583
584
|
dpi: int = 100,
|
|
@@ -600,7 +601,7 @@ def arima(
|
|
|
600
601
|
diff() 결과를 참고하여 결정. 기본값은 3.
|
|
601
602
|
q (int, optional): MA(Moving Average) 차수. 과거 오차의 영향을 모델링.
|
|
602
603
|
ACF 그래프를 참고하여 결정. auto=True일 때 max_q로 사용. 기본값은 3.
|
|
603
|
-
s (int, optional): 계절 주기(Seasonality). None이면 비계절 ARIMA.
|
|
604
|
+
s (int | None, optional): 계절 주기(Seasonality). None이면 비계절 ARIMA.
|
|
604
605
|
예: 월별 데이터는 s=12, 주별 데이터는 s=52.
|
|
605
606
|
설정 시 SARIMA(p,d,q)(P,D,Q,s) 모델 사용. 기본값은 None.
|
|
606
607
|
periods (int, optional): test 기간 이후 추가 예측 기간 수.
|
|
@@ -692,17 +693,15 @@ def arima(
|
|
|
692
693
|
fig = plt.figure(figsize=figsize, dpi=dpi)
|
|
693
694
|
ax = fig.gca()
|
|
694
695
|
|
|
695
|
-
sb.lineplot(data=train, x=train.index, y=train.columns[0], label="Train", ax=ax)
|
|
696
|
-
sb.lineplot(data=test, x=test.index, y=test.columns[0], label="Test", ax=ax)
|
|
696
|
+
sb.lineplot(data=train, x=train.index, y=train.columns[0], label="Train", ax=ax) # type: ignore
|
|
697
|
+
sb.lineplot(data=test, x=test.index, y=test.columns[0], label="Test", ax=ax) # type: ignore
|
|
697
698
|
|
|
698
699
|
if auto:
|
|
699
700
|
sb.lineplot(
|
|
700
701
|
x=pred.index, y=pred.values, label="Prediction", linestyle="--", ax=ax
|
|
701
702
|
)
|
|
702
703
|
else:
|
|
703
|
-
sb.lineplot(
|
|
704
|
-
x=test_pred.index, y=test_pred, label="Prediction", linestyle="--", ax=ax
|
|
705
|
-
)
|
|
704
|
+
sb.lineplot(x=test_pred.index, y=test_pred, label="Prediction", linestyle="--", ax=ax) # type: ignore
|
|
706
705
|
sb.lineplot(x=pred.index, y=pred, label="Forecast", linestyle="--", ax=ax)
|
|
707
706
|
|
|
708
707
|
ax.grid()
|
|
@@ -719,10 +718,10 @@ def arima(
|
|
|
719
718
|
# ===================================================================
|
|
720
719
|
def __prophet_execute(
|
|
721
720
|
train: DataFrame,
|
|
722
|
-
test: DataFrame = None,
|
|
721
|
+
test: DataFrame | None = None,
|
|
723
722
|
periods: int = 0,
|
|
724
723
|
freq: str = "D",
|
|
725
|
-
callback:
|
|
724
|
+
callback: Callable | None = None,
|
|
726
725
|
**params,
|
|
727
726
|
):
|
|
728
727
|
"""Prophet 모델을 생성한다.
|
|
@@ -732,7 +731,7 @@ def __prophet_execute(
|
|
|
732
731
|
test (DataFrame, optional): 검증데이터. Defaults to None.
|
|
733
732
|
periods (int, optional): 예측기간. Defaults to 0.
|
|
734
733
|
freq (str, optional): 예측주기(D,M,Y). Defaults to "D".
|
|
735
|
-
callback (
|
|
734
|
+
callback (Callable, optional): 콜백함수. Defaults to None.
|
|
736
735
|
**params (dict, optional): 하이퍼파라미터. Defaults to None.
|
|
737
736
|
|
|
738
737
|
Returns:
|
|
@@ -753,10 +752,10 @@ def __prophet_execute(
|
|
|
753
752
|
|
|
754
753
|
if test is not None:
|
|
755
754
|
pred = forecast[["ds", "yhat"]][-size:]
|
|
756
|
-
score = np.sqrt(mean_squared_error(test["y"].values, pred["yhat"].values))
|
|
755
|
+
score = np.sqrt(mean_squared_error(test["y"].values, pred["yhat"].values)) # type: ignore
|
|
757
756
|
else:
|
|
758
757
|
pred = forecast[["ds", "yhat"]]
|
|
759
|
-
score = np.sqrt(mean_squared_error(train["y"].values, pred["yhat"].values))
|
|
758
|
+
score = np.sqrt(mean_squared_error(train["y"].values, pred["yhat"].values)) # type: ignore
|
|
760
759
|
|
|
761
760
|
return model, score, dict(params), forecast, pred
|
|
762
761
|
|
|
@@ -766,16 +765,16 @@ def __prophet_execute(
|
|
|
766
765
|
# ===================================================================
|
|
767
766
|
def prophet(
|
|
768
767
|
train: DataFrame,
|
|
769
|
-
test: DataFrame = None,
|
|
768
|
+
test: DataFrame | None = None,
|
|
770
769
|
periods: int = 0,
|
|
771
770
|
freq: str = "D",
|
|
772
771
|
report: bool = True,
|
|
773
772
|
print_forecast: bool = False,
|
|
774
773
|
figsize=(20, 8),
|
|
775
774
|
dpi: int = 200,
|
|
776
|
-
callback:
|
|
775
|
+
callback: Callable | None = None,
|
|
777
776
|
**params,
|
|
778
|
-
) -> DataFrame:
|
|
777
|
+
) -> tuple[Prophet, dict, float, DataFrame, DataFrame]:
|
|
779
778
|
"""Facebook Prophet 모델을 학습하고 최적 모델을 반환한다.
|
|
780
779
|
|
|
781
780
|
Facebook(Meta)의 Prophet 라이브러리를 사용하여 시계열 예측 모델을 구축한다.
|
|
@@ -897,7 +896,7 @@ def prophet(
|
|
|
897
896
|
|
|
898
897
|
else:
|
|
899
898
|
m, score, params, forecast, pred = __prophet_execute(
|
|
900
|
-
train=train, test=test, periods=periods, freq=freq, callback=callback, **p
|
|
899
|
+
train=train, test=test, periods=periods, freq=freq, callback=callback, **p # type: ignore
|
|
901
900
|
)
|
|
902
901
|
result.append(
|
|
903
902
|
{
|
|
@@ -925,7 +924,7 @@ def prophet(
|
|
|
925
924
|
# )
|
|
926
925
|
|
|
927
926
|
if report:
|
|
928
|
-
hs_prophet_report(
|
|
927
|
+
hs_prophet_report( # type: ignore
|
|
929
928
|
best_model, best_forecast, best_pred, test, print_forecast, figsize, dpi
|
|
930
929
|
)
|
|
931
930
|
|
|
@@ -939,11 +938,11 @@ def prophet_report(
|
|
|
939
938
|
model: Prophet,
|
|
940
939
|
forecast: DataFrame,
|
|
941
940
|
pred: DataFrame,
|
|
942
|
-
test: DataFrame = None,
|
|
941
|
+
test: DataFrame | None = None,
|
|
943
942
|
print_forecast: bool = False,
|
|
944
943
|
figsize: tuple = (20, 8),
|
|
945
944
|
dpi: int = 100,
|
|
946
|
-
) ->
|
|
945
|
+
) -> None:
|
|
947
946
|
"""Prophet 모델의 예측 결과와 성분 분해를 시각화하고 성능을 평가한다.
|
|
948
947
|
|
|
949
948
|
학습된 Prophet 모델의 예측 결과, 변화점(changepoints), 신뢰구간을 시각화하고,
|
|
@@ -1017,7 +1016,7 @@ def prophet_report(
|
|
|
1017
1016
|
linestyle="--",
|
|
1018
1017
|
)
|
|
1019
1018
|
|
|
1020
|
-
ax.set_ylim([forecast["yhat"].min() * 0.95, forecast["yhat"].max() * 1.05])
|
|
1019
|
+
ax.set_ylim([forecast["yhat"].min() * 0.95, forecast["yhat"].max() * 1.05]) # type: ignore
|
|
1021
1020
|
|
|
1022
1021
|
plt.legend()
|
|
1023
1022
|
plt.show()
|
|
@@ -1041,9 +1040,9 @@ def prophet_report(
|
|
|
1041
1040
|
y = test["y"].values
|
|
1042
1041
|
|
|
1043
1042
|
result = {
|
|
1044
|
-
"평균절대오차(MAE)": mean_absolute_error(y, yhat),
|
|
1045
|
-
"평균제곱오차(MSE)": mean_squared_error(y, yhat),
|
|
1046
|
-
"평균오차(RMSE)": np.sqrt(mean_squared_error(y, yhat))
|
|
1043
|
+
"평균절대오차(MAE)": mean_absolute_error(y, yhat), # type: ignore
|
|
1044
|
+
"평균제곱오차(MSE)": mean_squared_error(y, yhat), # type: ignore
|
|
1045
|
+
"평균오차(RMSE)": np.sqrt(mean_squared_error(y, yhat)) # type: ignore
|
|
1047
1046
|
}
|
|
1048
1047
|
|
|
1049
1048
|
pretty_table(DataFrame(result, index=["Prophet"]).T)
|
|
@@ -1052,7 +1051,7 @@ def prophet_report(
|
|
|
1052
1051
|
# ===================================================================
|
|
1053
1052
|
# 주말 날짜를 포함하는 휴일 데이터프레임을 생성
|
|
1054
1053
|
# ===================================================================
|
|
1055
|
-
def get_weekend_df(start:
|
|
1054
|
+
def get_weekend_df(start: dt.datetime | str, end: dt.datetime | str | None = None) -> DataFrame:
|
|
1056
1055
|
"""주말 날짜를 포함하는 휴일 데이터프레임을 생성한다.
|
|
1057
1056
|
|
|
1058
1057
|
Prophet 모델의 holidays 파라미터에 사용할 수 있는 형식의 주말 휴일
|
hossam/hs_util.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# -------------------------------------------------------------
|
|
3
|
+
import requests
|
|
4
|
+
import json
|
|
3
5
|
from typing import TYPE_CHECKING
|
|
4
6
|
from importlib.metadata import distributions
|
|
5
7
|
import pandas as pd
|
|
@@ -7,8 +9,203 @@ import numpy as np
|
|
|
7
9
|
from pandas import DataFrame, DatetimeIndex, read_csv, read_excel
|
|
8
10
|
from scipy.stats import normaltest
|
|
9
11
|
from tabulate import tabulate
|
|
12
|
+
from os.path import join, exists
|
|
13
|
+
from io import BytesIO
|
|
14
|
+
from pandas import DataFrame, read_csv, read_excel
|
|
15
|
+
from typing import Optional, Tuple, Any
|
|
10
16
|
|
|
11
|
-
|
|
17
|
+
BASE_URL = "https://data.hossam.kr"
|
|
18
|
+
|
|
19
|
+
# -------------------------------------------------------------
|
|
20
|
+
def __get_df(path: str, index_col=None) -> DataFrame:
|
|
21
|
+
p = path.rfind(".")
|
|
22
|
+
exec = path[p+1:].lower()
|
|
23
|
+
|
|
24
|
+
if exec == 'xlsx':
|
|
25
|
+
# If path is a remote URL, fetch the file once and reuse the bytes
|
|
26
|
+
if path.lower().startswith(('http://', 'https://')):
|
|
27
|
+
path = path.replace("\\", "/")
|
|
28
|
+
with requests.Session() as session:
|
|
29
|
+
r = session.get(path)
|
|
30
|
+
|
|
31
|
+
if r.status_code != 200:
|
|
32
|
+
raise Exception(f"HTTP {r.status_code} Error - {r.reason} > {path}")
|
|
33
|
+
|
|
34
|
+
data_bytes = r.content
|
|
35
|
+
|
|
36
|
+
# Use separate BytesIO objects for each read to avoid pointer/stream issues
|
|
37
|
+
df = read_excel(BytesIO(data_bytes), index_col=index_col)
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
info = read_excel(BytesIO(data_bytes), sheet_name='metadata', index_col=0)
|
|
41
|
+
#print("\033[94m[metadata]\033[0m")
|
|
42
|
+
print()
|
|
43
|
+
pretty_table(info)
|
|
44
|
+
print()
|
|
45
|
+
except Exception:
|
|
46
|
+
#print(f"\033[91m[!] Cannot read metadata\033[0m")
|
|
47
|
+
pass
|
|
48
|
+
else:
|
|
49
|
+
df = read_excel(path, index_col=index_col)
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
info = read_excel(path, sheet_name='metadata', index_col=0)
|
|
53
|
+
#print("\033[94m[metadata]\033[0m")
|
|
54
|
+
print()
|
|
55
|
+
pretty_table(info)
|
|
56
|
+
print()
|
|
57
|
+
except:
|
|
58
|
+
#print(f"\033[91m[!] Cannot read metadata\033[0m")
|
|
59
|
+
pass
|
|
60
|
+
else:
|
|
61
|
+
df = read_csv(path, index_col=index_col)
|
|
62
|
+
|
|
63
|
+
return df
|
|
64
|
+
|
|
65
|
+
# -------------------------------------------------------------
|
|
66
|
+
def __get_data_url(key: str, local: str | None = None) -> Tuple[str, Any, Any]:
|
|
67
|
+
global BASE_URL
|
|
68
|
+
|
|
69
|
+
path = None
|
|
70
|
+
|
|
71
|
+
if not local:
|
|
72
|
+
data_path = join(BASE_URL, "metadata.json").replace("\\", "/")
|
|
73
|
+
|
|
74
|
+
with requests.Session() as session:
|
|
75
|
+
r = session.get(data_path)
|
|
76
|
+
|
|
77
|
+
if r.status_code != 200:
|
|
78
|
+
raise Exception("[%d Error] %s" % (r.status_code, r.reason))
|
|
79
|
+
|
|
80
|
+
my_dict = r.json()
|
|
81
|
+
info = my_dict.get(key.lower())
|
|
82
|
+
|
|
83
|
+
if not info:
|
|
84
|
+
raise FileNotFoundError("%s는 존재하지 않는 데이터에 대한 요청입니다." % key)
|
|
85
|
+
|
|
86
|
+
path = join(BASE_URL, info['url'])
|
|
87
|
+
else:
|
|
88
|
+
data_path = join(local, "metadata.json")
|
|
89
|
+
|
|
90
|
+
if not exists(data_path):
|
|
91
|
+
raise FileNotFoundError("존재하지 않는 데이터에 대한 요청입니다.")
|
|
92
|
+
|
|
93
|
+
with open(data_path, "r", encoding="utf-8") as f:
|
|
94
|
+
my_dict = json.loads(f.read())
|
|
95
|
+
|
|
96
|
+
info = my_dict.get(key.lower())
|
|
97
|
+
path = join(local, info['url'])
|
|
98
|
+
|
|
99
|
+
return path, info.get('desc'), info.get('index')
|
|
100
|
+
|
|
101
|
+
# -------------------------------------------------------------
|
|
102
|
+
def load_info(search: str | None = None, local: str | None = None) -> DataFrame:
|
|
103
|
+
"""메타데이터에서 사용 가능한 데이터셋 정보를 로드한다.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
search (str, optional): 이름 필터 문자열. 포함하는 항목만 반환.
|
|
107
|
+
local (str, optional): 로컬 메타데이터 경로. None이면 원격(BASE_URL) 사용.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
DataFrame: name, chapter, desc, url 컬럼을 갖는 테이블
|
|
111
|
+
|
|
112
|
+
Examples:
|
|
113
|
+
```python
|
|
114
|
+
from hossam import *
|
|
115
|
+
info = load_info()
|
|
116
|
+
list(info.columns) #['name', 'chapter', 'desc', 'url']
|
|
117
|
+
```
|
|
118
|
+
"""
|
|
119
|
+
global BASE_URL
|
|
120
|
+
|
|
121
|
+
path = None
|
|
122
|
+
|
|
123
|
+
if not local:
|
|
124
|
+
data_path = join(BASE_URL, "metadata.json").replace("\\", "/")
|
|
125
|
+
|
|
126
|
+
with requests.Session() as session:
|
|
127
|
+
r = session.get(data_path)
|
|
128
|
+
|
|
129
|
+
if r.status_code != 200:
|
|
130
|
+
raise Exception("[%d Error] %s ::: %s" % (r.status_code, r.reason, data_path))
|
|
131
|
+
|
|
132
|
+
my_dict = r.json()
|
|
133
|
+
else:
|
|
134
|
+
data_path = join(local, "metadata.json")
|
|
135
|
+
|
|
136
|
+
if not exists(data_path):
|
|
137
|
+
raise FileNotFoundError("존재하지 않는 데이터에 대한 요청입니다.")
|
|
138
|
+
|
|
139
|
+
with open(data_path, "r", encoding="utf-8") as f:
|
|
140
|
+
my_dict = json.loads(f.read())
|
|
141
|
+
|
|
142
|
+
my_data = []
|
|
143
|
+
for key in my_dict:
|
|
144
|
+
if 'index' in my_dict[key]:
|
|
145
|
+
del my_dict[key]['index']
|
|
146
|
+
|
|
147
|
+
my_dict[key]['url'] = "%s/%s" % (BASE_URL, my_dict[key]['url'])
|
|
148
|
+
my_dict[key]['name'] = key
|
|
149
|
+
|
|
150
|
+
if 'chapter' in my_dict[key]:
|
|
151
|
+
my_dict[key]['chapter'] = ", ".join(my_dict[key]['chapter'])
|
|
152
|
+
else:
|
|
153
|
+
my_dict[key]['chapter'] = '공통'
|
|
154
|
+
|
|
155
|
+
my_data.append(my_dict[key])
|
|
156
|
+
|
|
157
|
+
my_df = DataFrame(my_data)
|
|
158
|
+
my_df2 = my_df.reindex(columns=['name', 'chapter', 'desc', 'url'])
|
|
159
|
+
|
|
160
|
+
if search:
|
|
161
|
+
my_df2 = my_df2[my_df2['name'].str.contains(search.lower())]
|
|
162
|
+
|
|
163
|
+
return my_df2
|
|
164
|
+
|
|
165
|
+
# -------------------------------------------------------------
|
|
166
|
+
def _load_data_remote(key: str, local: str | None = None) -> Optional[DataFrame]:
|
|
167
|
+
"""키로 지정된 데이터셋을 로드한다.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
key (str): 메타데이터에 정의된 데이터 식별자(파일명 또는 별칭)
|
|
171
|
+
local (str, optional): 로컬 메타데이터 경로. None이면 원격(BASE_URL) 사용.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
DataFrame | None: 성공 시 데이터프레임, 실패 시 None
|
|
175
|
+
|
|
176
|
+
Examples:
|
|
177
|
+
```python
|
|
178
|
+
from hossam import *
|
|
179
|
+
df = load_data('AD_SALES') # 메타데이터에 해당 키가 있어야 함
|
|
180
|
+
```
|
|
181
|
+
"""
|
|
182
|
+
index = None
|
|
183
|
+
try:
|
|
184
|
+
url, desc, index = __get_data_url(key, local=local)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
try:
|
|
187
|
+
print(f"\033[91m{str(e)}\033[0m")
|
|
188
|
+
except Exception:
|
|
189
|
+
print(e)
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
#print("\033[94m[data]\033[0m", url.replace("\\", "/"))
|
|
193
|
+
#print("\033[94m[desc]\033[0m", desc)
|
|
194
|
+
print(f"\033[94m{desc}\033[0m")
|
|
195
|
+
|
|
196
|
+
df = None
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
df = __get_df(url, index_col=index)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
try:
|
|
202
|
+
print(f"\033[91m{str(e)}\033[0m")
|
|
203
|
+
except Exception:
|
|
204
|
+
print(e)
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
return df
|
|
12
209
|
|
|
13
210
|
# ===================================================================
|
|
14
211
|
# 설치된 파이썬 패키지 목록 반환
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
hossam/NotoSansKR-Regular.ttf,sha256=0SCufUQwcVWrWTu75j4Lt_V2bgBJIBXl1p8iAJJYkVY,6185516
|
|
2
|
+
hossam/__init__.py,sha256=REsuVeTU3Thks1Uk2mRxtnW2yLf01uPP4rkeDjBY468,2749
|
|
3
|
+
hossam/hs_classroom.py,sha256=oNRnHPXOu0-YqtPY7EJeS1qteH0CtKxNk5Lt7opti_w,27523
|
|
4
|
+
hossam/hs_gis.py,sha256=DVmndBK-_7GMK3J1_on3ieEQk1S0MfUZ8_wlX-cDdZQ,11581
|
|
5
|
+
hossam/hs_plot.py,sha256=83B7fjEDaXnpwg8GhDGsVX6lAd81rYqoqvMGzovn3qc,85900
|
|
6
|
+
hossam/hs_prep.py,sha256=ypuX97mCxpo7CLoI_S79bUw7th0ok5LCZjt4vzRaGiI,38326
|
|
7
|
+
hossam/hs_stats.py,sha256=wPml2m22jJOHBH6RRwqTkYCZfh76OGiGfouvidoti48,118904
|
|
8
|
+
hossam/hs_timeserise.py,sha256=XB8DKJBFb-892ACNCATcyBliSJVtbn-dpzfKi-grRAo,43148
|
|
9
|
+
hossam/hs_util.py,sha256=i5thXDt4VVWbju3y6Q7PAdEay62b-5PJNX9TjQhFZCM,14663
|
|
10
|
+
hossam/leekh.png,sha256=1PB5NQ24SDoHA5KMiBBsWpSa3iniFcwFTuGwuOsTHfI,6395
|
|
11
|
+
hossam-0.4.6.dist-info/licenses/LICENSE,sha256=nIqzhlcFY_2D6QtFsYjwU7BWkafo-rUJOQpDZ-DsauI,941
|
|
12
|
+
hossam-0.4.6.dist-info/METADATA,sha256=WhQf4TX3ZRgifqFsGg9yRS4xpRj_H4bolrasZImHMAg,3676
|
|
13
|
+
hossam-0.4.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
hossam-0.4.6.dist-info/top_level.txt,sha256=_-7bwjhthHplWhywEaHIJX2yL11CQCaLjCNSBlk6wiQ,7
|
|
15
|
+
hossam-0.4.6.dist-info/RECORD,,
|