hossam 0.3.20__py3-none-any.whl → 0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hossam/__init__.py +19 -22
- hossam/data_loader.py +16 -10
- hossam/hs_classroom.py +48 -38
- hossam/hs_gis.py +10 -6
- hossam/hs_plot.py +153 -150
- hossam/hs_prep.py +95 -85
- hossam/hs_stats.py +426 -548
- hossam/hs_timeserise.py +161 -152
- hossam/hs_util.py +44 -17
- {hossam-0.3.20.dist-info → hossam-0.4.dist-info}/METADATA +6 -107
- hossam-0.4.dist-info/RECORD +16 -0
- hossam/mcp/__init__.py +0 -12
- hossam/mcp/hs_classroom.py +0 -22
- hossam/mcp/hs_gis.py +0 -30
- hossam/mcp/hs_plot.py +0 -53
- hossam/mcp/hs_prep.py +0 -61
- hossam/mcp/hs_stats.py +0 -25
- hossam/mcp/hs_timeserise.py +0 -22
- hossam/mcp/hs_util.py +0 -30
- hossam/mcp/loader.py +0 -29
- hossam/mcp/server.py +0 -675
- hossam-0.3.20.dist-info/RECORD +0 -27
- hossam-0.3.20.dist-info/entry_points.txt +0 -2
- {hossam-0.3.20.dist-info → hossam-0.4.dist-info}/WHEEL +0 -0
- {hossam-0.3.20.dist-info → hossam-0.4.dist-info}/licenses/LICENSE +0 -0
- {hossam-0.3.20.dist-info → hossam-0.4.dist-info}/top_level.txt +0 -0
hossam/hs_timeserise.py
CHANGED
|
@@ -70,21 +70,21 @@ def diff(
|
|
|
70
70
|
- 각 반복마다 ADF 검정 통계량, p-value, 기각값을 출력한다.
|
|
71
71
|
|
|
72
72
|
Examples:
|
|
73
|
-
|
|
73
|
+
```python
|
|
74
|
+
from hossam import *
|
|
75
|
+
from pandas import DataFrame, date_range
|
|
74
76
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
>>> stationary_df = diff(df, 'value')
|
|
77
|
+
# 기본 사용 (정상성 만족까지 자동 차분):
|
|
78
|
+
df = DataFrame({'value': [100, 102, 105, 110, 120]},
|
|
79
|
+
index=date_range('2020-01', periods=5, freq='M'))
|
|
80
|
+
stationary_df = hs_timeseries.diff(df, 'value')
|
|
80
81
|
|
|
81
|
-
최대 2차 차분으로 제한:
|
|
82
|
+
# 최대 2차 차분으로 제한:
|
|
83
|
+
stationary_df = hs_timeseries.diff(df, 'value', max_diff=2)
|
|
82
84
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
>>> stationary_df = diff(df, 'value', plot=False)
|
|
85
|
+
# 그래프 없이 실행:
|
|
86
|
+
stationary_df = hs_timeseries.diff(df, 'value', plot=False)
|
|
87
|
+
```
|
|
88
88
|
"""
|
|
89
89
|
df = data.copy()
|
|
90
90
|
|
|
@@ -170,17 +170,18 @@ def rolling(
|
|
|
170
170
|
- 계절성 파악을 위해서는 계절 주기와 동일한 윈도우 사용을 권장한다.
|
|
171
171
|
|
|
172
172
|
Examples:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
173
|
+
```python
|
|
174
|
+
from hossam import *
|
|
175
|
+
from pandas import Series, date_range
|
|
176
|
+
|
|
177
|
+
# 7일 이동평균 계산:
|
|
178
|
+
data = Series([10, 12, 13, 15, 14, 16, 18],
|
|
179
|
+
index=date_range('2020-01-01', periods=7))
|
|
180
|
+
ma7 = hs_timeseries.rolling(data, window=7)
|
|
181
|
+
|
|
182
|
+
# 30일 이동평균, 그래프 없이:
|
|
183
|
+
ma30 = hs_timeseries.rolling(data, window=30, plot=False)
|
|
184
|
+
```
|
|
184
185
|
"""
|
|
185
186
|
rolling = data.rolling(window=window).mean()
|
|
186
187
|
|
|
@@ -229,17 +230,18 @@ def ewm(
|
|
|
229
230
|
- α = 2/(span+1) 공식으로 smoothing factor가 결정된다.
|
|
230
231
|
|
|
231
232
|
Examples:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
233
|
+
```python
|
|
234
|
+
from hossam import ewm
|
|
235
|
+
from pandas import Series, date_range
|
|
236
|
+
|
|
237
|
+
# 12기간 지수가중이동평균:
|
|
238
|
+
data = Series([10, 12, 13, 15, 14, 16, 18],
|
|
239
|
+
index=date_range('2020-01-01', periods=7))
|
|
240
|
+
ewma = hs_timeseries.ewm(data, span=12)
|
|
241
|
+
|
|
242
|
+
# 단기 추세 파악 (span=5):
|
|
243
|
+
ewma_short = hs_timeseries.ewm(data, span=5, plot=False)
|
|
244
|
+
```
|
|
243
245
|
"""
|
|
244
246
|
ewm = data.ewm(span=span).mean()
|
|
245
247
|
|
|
@@ -304,18 +306,19 @@ def seasonal_decompose(
|
|
|
304
306
|
- 주기(period)는 데이터의 빈도(frequency)에서 자동 추론된다.
|
|
305
307
|
|
|
306
308
|
Examples:
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
309
|
+
```python
|
|
310
|
+
from hossam import *
|
|
311
|
+
from pandas import Series, date_range
|
|
312
|
+
|
|
313
|
+
# 월별 데이터 가법 분해:
|
|
314
|
+
data = Series([100, 120, 110, 130, 150, 140],
|
|
315
|
+
index=date_range('2020-01', periods=6, freq='M'))
|
|
316
|
+
components = hs_timeseries.seasonal_decompose(data, model='additive')
|
|
317
|
+
|
|
318
|
+
# 승법 모델 사용:
|
|
319
|
+
components = hs_timeseries.seasonal_decompose(data, model='multiplicative', plot=False)
|
|
320
|
+
print(components[['trend', 'seasonal']].head())
|
|
321
|
+
```
|
|
319
322
|
"""
|
|
320
323
|
if model not in ["additive", "multiplicative"]:
|
|
321
324
|
raise ValueError("model은 'additive' 또는 'multiplicative'이어야 합니다.")
|
|
@@ -354,7 +357,7 @@ def seasonal_decompose(
|
|
|
354
357
|
# ===================================================================
|
|
355
358
|
# 시계열 데이터에 대한 학습/테스트 데이터 분할
|
|
356
359
|
# ===================================================================
|
|
357
|
-
def
|
|
360
|
+
def train_test_split(data: DataFrame, test_size: float = 0.2) -> tuple:
|
|
358
361
|
"""시계열 데이터를 시간 순서를 유지하며 학습/테스트 세트로 분할한다.
|
|
359
362
|
|
|
360
363
|
일반적인 random split과 달리 시간 순서를 엄격히 유지하여 분할한다.
|
|
@@ -379,19 +382,19 @@ def timeseries_split(data: DataFrame, test_size: float = 0.2) -> tuple:
|
|
|
379
382
|
- 일반적으로 test_size는 0.1~0.3 범위를 사용한다.
|
|
380
383
|
|
|
381
384
|
Examples:
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
70:30 분할:
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
385
|
+
```python
|
|
386
|
+
from hossam import *
|
|
387
|
+
from pandas import DataFrame, date_range
|
|
388
|
+
|
|
389
|
+
# 80:20 분할 (기본):
|
|
390
|
+
df = DataFrame({'value': range(100)}, index=date_range('2020-01-01', periods=100))
|
|
391
|
+
train, test = hs_timeseries.train_test_split(df)
|
|
392
|
+
print(len(train), len(test)) # 80, 20
|
|
393
|
+
|
|
394
|
+
# 70:30 분할:
|
|
395
|
+
train, test = hs_timeseries.train_test_split(df, test_size=0.3)
|
|
396
|
+
print(len(train), len(test)) # 70, 30
|
|
397
|
+
```
|
|
395
398
|
"""
|
|
396
399
|
train_size = 1 - test_size
|
|
397
400
|
|
|
@@ -430,17 +433,18 @@ def acf_plot(
|
|
|
430
433
|
- 계절성이 있으면 계절 주기마다 높은 ACF 값이 나타난다.
|
|
431
434
|
|
|
432
435
|
Examples:
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
436
|
+
```python
|
|
437
|
+
from hossam import *
|
|
438
|
+
from pandas import Series, date_range
|
|
439
|
+
|
|
440
|
+
# 기본 ACF 플롯:
|
|
441
|
+
data = Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
442
|
+
index=date_range('2020-01-01', periods=10))
|
|
443
|
+
hs_timeseries.acf_plot(data)
|
|
444
|
+
|
|
445
|
+
# 콜백으로 제목 추가:
|
|
446
|
+
hs_timeseries.acf_plot(data, callback=lambda ax: ax.set_title('My ACF Plot'))
|
|
447
|
+
```
|
|
444
448
|
"""
|
|
445
449
|
fig = plt.figure(figsize=figsize, dpi=dpi)
|
|
446
450
|
ax = fig.gca()
|
|
@@ -481,17 +485,18 @@ def pacf_plot(
|
|
|
481
485
|
- 파란색 영역(신뢰구간)을 벗어나는 lag가 AR 항의 개수를 나타낸다.
|
|
482
486
|
|
|
483
487
|
Examples:
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
488
|
+
```python
|
|
489
|
+
from hossam import *
|
|
490
|
+
from pandas import Series, date_range
|
|
491
|
+
|
|
492
|
+
# 기본 PACF 플롯:
|
|
493
|
+
data = Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
|
|
494
|
+
index=date_range('2020-01-01', periods=10))
|
|
495
|
+
hs_timeseries.pacf_plot(data)
|
|
496
|
+
|
|
497
|
+
# 콜백으로 커스터마이징:
|
|
498
|
+
hs_timeseries.pacf_plot(data, callback=lambda ax: ax.set_ylabel('Partial Correlation'))
|
|
499
|
+
```
|
|
495
500
|
"""
|
|
496
501
|
fig = plt.figure(figsize=figsize, dpi=dpi)
|
|
497
502
|
ax = fig.gca()
|
|
@@ -535,16 +540,17 @@ def acf_pacf_plot(
|
|
|
535
540
|
실전에서는 auto_arima를 사용한 자동 선택도 권장된다.
|
|
536
541
|
|
|
537
542
|
Examples:
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
543
|
+
```python
|
|
544
|
+
from hossam import *
|
|
545
|
+
from pandas import Series, date_range
|
|
546
|
+
|
|
547
|
+
# ARIMA 모델링 전 차수 탐색:
|
|
548
|
+
data = Series([10, 12, 13, 15, 14, 16, 18, 20],
|
|
549
|
+
index=date_range('2020-01-01', periods=8))
|
|
550
|
+
|
|
551
|
+
# 1차 차분 후 ACF/PACF 플롯:
|
|
552
|
+
stationary = hs_timeseries.diff(data, 'value')
|
|
553
|
+
hs_timeseries.acf_pacf_plot(stationary)
|
|
548
554
|
"""
|
|
549
555
|
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(figsize[0], figsize[1] * 2), dpi=dpi)
|
|
550
556
|
|
|
@@ -615,22 +621,22 @@ def arima(
|
|
|
615
621
|
- auto=True는 시간이 오래 걸릴 수 있으나 최적 모델을 찾아줌
|
|
616
622
|
|
|
617
623
|
Examples:
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
계절성 모델 SARIMA(1,1,1)(1,1,1,12):
|
|
632
|
-
|
|
633
|
-
|
|
624
|
+
```python
|
|
625
|
+
from hossam import *
|
|
626
|
+
from pandas import Series, date_range
|
|
627
|
+
|
|
628
|
+
# 수동으로 ARIMA(2,1,2) 모델 생성:
|
|
629
|
+
data = Series([100, 102, 105, 110, 115, 120, 125, 130],
|
|
630
|
+
index=date_range('2020-01', periods=8, freq='M'))
|
|
631
|
+
train, test = hs_timeseries.train_test_split(data, test_size=0.25)
|
|
632
|
+
model = hs_timeseries.arima(train, test, p=2, d=1, q=2)
|
|
633
|
+
|
|
634
|
+
# auto_arima로 최적 모델 탐색:
|
|
635
|
+
model = hs_timeseries.arima(train, test, auto=True)
|
|
636
|
+
|
|
637
|
+
# 계절성 모델 SARIMA(1,1,1)(1,1,1,12):
|
|
638
|
+
model = hs_timeseries.arima(train, test, p=1, d=1, q=1, s=12)
|
|
639
|
+
```
|
|
634
640
|
"""
|
|
635
641
|
model = None
|
|
636
642
|
|
|
@@ -821,30 +827,32 @@ def prophet(
|
|
|
821
827
|
- 외부 회귀변수는 callback에서 add_regressor()로 추가 가능.
|
|
822
828
|
|
|
823
829
|
Examples:
|
|
824
|
-
|
|
830
|
+
```python
|
|
831
|
+
from hossam import *
|
|
832
|
+
from pandas import DataFrame, date_range
|
|
825
833
|
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
>>> model, params, score, forecast, pred = hs_prophet(train)
|
|
834
|
+
# 기본 사용 (단일 모델):
|
|
835
|
+
train = DataFrame({
|
|
836
|
+
'ds': date_range('2020-01-01', periods=100),
|
|
837
|
+
'y': range(100)
|
|
838
|
+
})
|
|
839
|
+
model, params, score, forecast, pred = hs_timeseries.prophet(train)
|
|
833
840
|
|
|
834
841
|
하이퍼파라미터 그리드 서치:
|
|
835
842
|
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
843
|
+
model, params, score, forecast, pred = hs_timeseries.prophet(
|
|
844
|
+
train,
|
|
845
|
+
changepoint_prior_scale=[0.001, 0.01, 0.1],
|
|
846
|
+
seasonality_prior_scale=[0.01, 0.1, 1.0],
|
|
847
|
+
seasonality_mode=['additive', 'multiplicative']
|
|
848
|
+
)
|
|
842
849
|
|
|
843
|
-
휴일 효과 추가:
|
|
850
|
+
# 휴일 효과 추가:
|
|
851
|
+
def add_holidays(m):
|
|
852
|
+
m.add_country_holidays(country_name='KR')
|
|
844
853
|
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
>>> model, _, _, _, _ = hs_prophet(train, callback=add_holidays)
|
|
854
|
+
model, _, _, _, _ = hs_timeseries.prophet(train, callback=add_holidays)
|
|
855
|
+
```
|
|
848
856
|
"""
|
|
849
857
|
|
|
850
858
|
# logger = logging.getLogger("cmdstanpy")
|
|
@@ -963,19 +971,20 @@ def prophet_report(
|
|
|
963
971
|
- 변화점은 모델이 추세 변화를 감지한 시점을 수직선으로 표시
|
|
964
972
|
|
|
965
973
|
Examples:
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
>>> model, _, _, forecast, pred = hs_prophet(train)
|
|
970
|
-
>>> hs_prophet_report(model, forecast, pred)
|
|
974
|
+
```python
|
|
975
|
+
from hossam import *
|
|
976
|
+
from pandas import DataFrame, date_range
|
|
971
977
|
|
|
972
|
-
|
|
978
|
+
# 기본 리포트 출력:
|
|
979
|
+
model, _, _, forecast, pred = hs_timeseries.prophet(train)
|
|
980
|
+
hs_timeseries.prophet_report(model, forecast, pred)
|
|
973
981
|
|
|
974
|
-
|
|
982
|
+
# test 데이터와 함께 성능 평가:
|
|
983
|
+
hs_timeseries.prophet_report(model, forecast, pred, test=test)
|
|
975
984
|
|
|
976
|
-
예측 테이블 출력:
|
|
977
|
-
|
|
978
|
-
|
|
985
|
+
# 예측 테이블 출력:
|
|
986
|
+
hs_timeseries.prophet_report(model, forecast, pred, print_forecast=True)
|
|
987
|
+
```
|
|
979
988
|
"""
|
|
980
989
|
|
|
981
990
|
# ------------------------------------------------------
|
|
@@ -1068,22 +1077,22 @@ def get_weekend_df(start: any, end: any = None) -> DataFrame:
|
|
|
1068
1077
|
- 토요일(Saturday), 일요일(Sunday)을 자동 탐지하여 추출한다.
|
|
1069
1078
|
|
|
1070
1079
|
Examples:
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1080
|
+
```python
|
|
1081
|
+
from hossam import *
|
|
1082
|
+
from pandas import DataFrame, date_range
|
|
1083
|
+
|
|
1084
|
+
# 2020년 전체 주말 생성:
|
|
1085
|
+
weekends = hs_timeseries.get_weekend_df('2020-01-01', '2020-12-31')
|
|
1086
|
+
print(len(weekends)) # 104 (52주 × 2일)
|
|
1087
|
+
|
|
1088
|
+
# 현재까지의 주말:
|
|
1089
|
+
weekends = hs_timeseries.get_weekend_df('2023-01-01')
|
|
1090
|
+
print(weekends.head())
|
|
1091
|
+
|
|
1092
|
+
# Prophet 모델에 주말 효과 추가:
|
|
1093
|
+
weekends = hs_timeseries.get_weekend_df('2020-01-01', '2025-12-31')
|
|
1094
|
+
model = hs_timeseries.prophet(train, holidays=weekends)
|
|
1095
|
+
```
|
|
1087
1096
|
"""
|
|
1088
1097
|
if end is None:
|
|
1089
1098
|
end = dt.datetime.now()
|
hossam/hs_util.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
# -------------------------------------------------------------
|
|
3
3
|
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
4
|
+
from importlib.metadata import distributions
|
|
5
|
+
import pandas as pd
|
|
5
6
|
import numpy as np
|
|
6
7
|
from pandas import DataFrame, DatetimeIndex, read_csv, read_excel
|
|
7
8
|
from scipy.stats import normaltest
|
|
@@ -9,6 +10,25 @@ from tabulate import tabulate
|
|
|
9
10
|
|
|
10
11
|
from .data_loader import load_data as _load_data_remote
|
|
11
12
|
|
|
13
|
+
# ===================================================================
|
|
14
|
+
# 설치된 파이썬 패키지 목록 반환
|
|
15
|
+
# ===================================================================
|
|
16
|
+
def my_packages():
|
|
17
|
+
"""
|
|
18
|
+
현재 파이썬 인터프리터에 설치된 모든 패키지의 이름과 버전을
|
|
19
|
+
패키지 이름순으로 정렬하여 pandas DataFrame으로 반환합니다.
|
|
20
|
+
Returns:
|
|
21
|
+
pd.DataFrame: columns=['name', 'version']
|
|
22
|
+
"""
|
|
23
|
+
pkgs = []
|
|
24
|
+
for dist in distributions():
|
|
25
|
+
name = dist.metadata['Name'] if 'Name' in dist.metadata else dist.name
|
|
26
|
+
version = dist.version
|
|
27
|
+
summary = dist.metadata.get('Summary', '')
|
|
28
|
+
pkgs.append((name, version, summary))
|
|
29
|
+
pkgs = sorted(pkgs, key=lambda x: x[0].lower())
|
|
30
|
+
return pd.DataFrame(pkgs, columns=['name', 'version', 'summary'])
|
|
31
|
+
|
|
12
32
|
# ===================================================================
|
|
13
33
|
# 정규분포 데이터 생성
|
|
14
34
|
# ===================================================================
|
|
@@ -27,10 +47,10 @@ def make_normalize_values(
|
|
|
27
47
|
np.ndarray: 정규분포를 따르는 데이터
|
|
28
48
|
|
|
29
49
|
Examples:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
50
|
+
```python
|
|
51
|
+
from hossam import *
|
|
52
|
+
x = hs.util.make_normalize_values(mean=0.0, std=1.0, size=100)
|
|
53
|
+
```
|
|
34
54
|
"""
|
|
35
55
|
p = 0.0
|
|
36
56
|
x: np.ndarray = np.array([])
|
|
@@ -95,9 +115,11 @@ def pretty_table(data: DataFrame, tablefmt="simple", headers: str = "keys") -> N
|
|
|
95
115
|
None
|
|
96
116
|
|
|
97
117
|
Examples:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
118
|
+
```python
|
|
119
|
+
from hossam import *
|
|
120
|
+
from pandas import DataFrame
|
|
121
|
+
hs_util.pretty_table(DataFrame({"a":[1,2],"b":[3,4]}))
|
|
122
|
+
```
|
|
101
123
|
"""
|
|
102
124
|
|
|
103
125
|
tabulate.WIDE_CHARS_MODE = False
|
|
@@ -113,10 +135,10 @@ def pretty_table(data: DataFrame, tablefmt="simple", headers: str = "keys") -> N
|
|
|
113
135
|
# ===================================================================
|
|
114
136
|
def __data_info(
|
|
115
137
|
origin: DataFrame,
|
|
116
|
-
index_col: str = None,
|
|
138
|
+
index_col: str | None = None,
|
|
117
139
|
timeindex: bool = False,
|
|
118
140
|
info: bool = True,
|
|
119
|
-
categories: list = None,
|
|
141
|
+
categories: list | None = None,
|
|
120
142
|
) -> DataFrame:
|
|
121
143
|
"""데이터 프레임을 통해 필요한 초기 작업을 수행한다.
|
|
122
144
|
|
|
@@ -173,11 +195,11 @@ def __data_info(
|
|
|
173
195
|
# 데이터 로드
|
|
174
196
|
# ===================================================================
|
|
175
197
|
def load_data(key: str,
|
|
176
|
-
index_col: str = None,
|
|
198
|
+
index_col: str | None = None,
|
|
177
199
|
timeindex: bool = False,
|
|
178
200
|
info: bool = True,
|
|
179
|
-
categories: list = None,
|
|
180
|
-
local: str = None) -> DataFrame:
|
|
201
|
+
categories: list | None = None,
|
|
202
|
+
local: str | None = None) -> DataFrame:
|
|
181
203
|
"""데이터 키를 통해 데이터를 로드한 뒤 기본 전처리/출력을 수행한다.
|
|
182
204
|
|
|
183
205
|
Args:
|
|
@@ -192,14 +214,16 @@ def load_data(key: str,
|
|
|
192
214
|
DataFrame: 전처리(인덱스 설정, 카테고리 변환)가 완료된 데이터프레임
|
|
193
215
|
|
|
194
216
|
Examples:
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
217
|
+
```python
|
|
218
|
+
from hossam import *
|
|
219
|
+
df = hs_util.load_data("AD_SALES", index_col=None, timeindex=False, info=False)
|
|
220
|
+
```
|
|
199
221
|
"""
|
|
200
222
|
|
|
201
223
|
k = key.lower()
|
|
202
224
|
|
|
225
|
+
origin = None
|
|
226
|
+
|
|
203
227
|
if k.endswith(".xlsx"):
|
|
204
228
|
origin = read_excel(key)
|
|
205
229
|
elif k.endswith(".csv"):
|
|
@@ -207,4 +231,7 @@ def load_data(key: str,
|
|
|
207
231
|
else:
|
|
208
232
|
origin = _load_data_remote(key, local)
|
|
209
233
|
|
|
234
|
+
if origin is None:
|
|
235
|
+
raise RuntimeError("Data loading failed: origin is None")
|
|
236
|
+
|
|
210
237
|
return __data_info(origin, index_col, timeindex, info, categories)
|