hossam 0.3.19__py3-none-any.whl → 0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hossam/hs_timeserise.py CHANGED
@@ -70,21 +70,21 @@ def diff(
70
70
  - 각 반복마다 ADF 검정 통계량, p-value, 기각값을 출력한다.
71
71
 
72
72
  Examples:
73
- 기본 사용 (정상성 만족까지 자동 차분):
73
+ ```python
74
+ from hossam import *
75
+ from pandas import DataFrame, date_range
74
76
 
75
- >>> from hossam import diff
76
- >>> import pandas as pd
77
- >>> df = pd.DataFrame({'value': [100, 102, 105, 110, 120]},
78
- ... index=pd.date_range('2020-01', periods=5, freq='M'))
79
- >>> stationary_df = diff(df, 'value')
77
+ # 기본 사용 (정상성 만족까지 자동 차분):
78
+ df = DataFrame({'value': [100, 102, 105, 110, 120]},
79
+ index=date_range('2020-01', periods=5, freq='M'))
80
+ stationary_df = hs_timeseries.diff(df, 'value')
80
81
 
81
- 최대 2차 차분으로 제한:
82
+ # 최대 2차 차분으로 제한:
83
+ stationary_df = hs_timeseries.diff(df, 'value', max_diff=2)
82
84
 
83
- >>> stationary_df = diff(df, 'value', max_diff=2)
84
-
85
- 그래프 없이 실행:
86
-
87
- >>> stationary_df = diff(df, 'value', plot=False)
85
+ # 그래프 없이 실행:
86
+ stationary_df = hs_timeseries.diff(df, 'value', plot=False)
87
+ ```
88
88
  """
89
89
  df = data.copy()
90
90
 
@@ -170,17 +170,18 @@ def rolling(
170
170
  - 계절성 파악을 위해서는 계절 주기와 동일한 윈도우 사용을 권장한다.
171
171
 
172
172
  Examples:
173
- 7일 이동평균 계산:
174
-
175
- >>> from hossam import rolling
176
- >>> import pandas as pd
177
- >>> data = pd.Series([10, 12, 13, 15, 14, 16, 18],
178
- ... index=pd.date_range('2020-01-01', periods=7))
179
- >>> ma7 = hs_rolling(data, window=7)
180
-
181
- 30일 이동평균, 그래프 없이:
182
-
183
- >>> ma30 = hs_rolling(data, window=30, plot=False)
173
+ ```python
174
+ from hossam import *
175
+ from pandas import Series, date_range
176
+
177
+ # 7일 이동평균 계산:
178
+ data = Series([10, 12, 13, 15, 14, 16, 18],
179
+ index=date_range('2020-01-01', periods=7))
180
+ ma7 = hs_timeseries.rolling(data, window=7)
181
+
182
+ # 30일 이동평균, 그래프 없이:
183
+ ma30 = hs_timeseries.rolling(data, window=30, plot=False)
184
+ ```
184
185
  """
185
186
  rolling = data.rolling(window=window).mean()
186
187
 
@@ -229,17 +230,18 @@ def ewm(
229
230
  - α = 2/(span+1) 공식으로 smoothing factor가 결정된다.
230
231
 
231
232
  Examples:
232
- 12기간 지수가중이동평균:
233
-
234
- >>> from hossam import ewm
235
- >>> import pandas as pd
236
- >>> data = pd.Series([10, 12, 13, 15, 14, 16, 18],
237
- ... index=pd.date_range('2020-01-01', periods=7))
238
- >>> ewma = hs_ewm(data, span=12)
239
-
240
- 단기 추세 파악 (span=5):
241
-
242
- >>> ewma_short = hs_ewm(data, span=5, plot=False)
233
+ ```python
234
+ from hossam import ewm
235
+ from pandas import Series, date_range
236
+
237
+ # 12기간 지수가중이동평균:
238
+ data = Series([10, 12, 13, 15, 14, 16, 18],
239
+ index=date_range('2020-01-01', periods=7))
240
+ ewma = hs_timeseries.ewm(data, span=12)
241
+
242
+ # 단기 추세 파악 (span=5):
243
+ ewma_short = hs_timeseries.ewm(data, span=5, plot=False)
244
+ ```
243
245
  """
244
246
  ewm = data.ewm(span=span).mean()
245
247
 
@@ -304,18 +306,19 @@ def seasonal_decompose(
304
306
  - 주기(period)는 데이터의 빈도(frequency)에서 자동 추론된다.
305
307
 
306
308
  Examples:
307
- 월별 데이터 가법 분해:
308
-
309
- >>> from hossam import seasonal_decompose
310
- >>> import pandas as pd
311
- >>> data = pd.Series([100, 120, 110, 130, 150, 140],
312
- ... index=pd.date_range('2020-01', periods=6, freq='M'))
313
- >>> components = hs_seasonal_decompose(data, model='additive')
314
-
315
- 승법 모델 사용:
316
-
317
- >>> components = hs_seasonal_decompose(data, model='multiplicative', plot=False)
318
- >>> print(components[['trend', 'seasonal']].head())
309
+ ```python
310
+ from hossam import *
311
+ from pandas import Series, date_range
312
+
313
+ # 월별 데이터 가법 분해:
314
+ data = Series([100, 120, 110, 130, 150, 140],
315
+ index=date_range('2020-01', periods=6, freq='M'))
316
+ components = hs_timeseries.seasonal_decompose(data, model='additive')
317
+
318
+ # 승법 모델 사용:
319
+ components = hs_timeseries.seasonal_decompose(data, model='multiplicative', plot=False)
320
+ print(components[['trend', 'seasonal']].head())
321
+ ```
319
322
  """
320
323
  if model not in ["additive", "multiplicative"]:
321
324
  raise ValueError("model은 'additive' 또는 'multiplicative'이어야 합니다.")
@@ -354,7 +357,7 @@ def seasonal_decompose(
354
357
  # ===================================================================
355
358
  # 시계열 데이터에 대한 학습/테스트 데이터 분할
356
359
  # ===================================================================
357
- def timeseries_split(data: DataFrame, test_size: float = 0.2) -> tuple:
360
+ def train_test_split(data: DataFrame, test_size: float = 0.2) -> tuple:
358
361
  """시계열 데이터를 시간 순서를 유지하며 학습/테스트 세트로 분할한다.
359
362
 
360
363
  일반적인 random split과 달리 시간 순서를 엄격히 유지하여 분할한다.
@@ -379,19 +382,19 @@ def timeseries_split(data: DataFrame, test_size: float = 0.2) -> tuple:
379
382
  - 일반적으로 test_size는 0.1~0.3 범위를 사용한다.
380
383
 
381
384
  Examples:
382
- 80:20 분할 (기본):
383
-
384
- >>> from hossam import timeseries_split
385
- >>> import pandas as pd
386
- >>> df = pd.DataFrame({'value': range(100)},
387
- ... index=pd.date_range('2020-01-01', periods=100))
388
- >>> train, test = hs_timeseries_split(df)
389
- >>> print(len(train), len(test)) # 80, 20
390
-
391
- 70:30 분할:
392
-
393
- >>> train, test = hs_timeseries_split(df, test_size=0.3)
394
- >>> print(len(train), len(test)) # 70, 30
385
+ ```python
386
+ from hossam import *
387
+ from pandas import DataFrame, date_range
388
+
389
+ # 80:20 분할 (기본):
390
+ df = DataFrame({'value': range(100)}, index=date_range('2020-01-01', periods=100))
391
+ train, test = hs_timeseries.train_test_split(df)
392
+ print(len(train), len(test)) # 80, 20
393
+
394
+ # 70:30 분할:
395
+ train, test = hs_timeseries.train_test_split(df, test_size=0.3)
396
+ print(len(train), len(test)) # 70, 30
397
+ ```
395
398
  """
396
399
  train_size = 1 - test_size
397
400
 
@@ -430,17 +433,18 @@ def acf_plot(
430
433
  - 계절성이 있으면 계절 주기마다 높은 ACF 값이 나타난다.
431
434
 
432
435
  Examples:
433
- 기본 ACF 플롯:
434
-
435
- >>> from hossam import acf_plot
436
- >>> import pandas as pd
437
- >>> data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
438
- ... index=pd.date_range('2020-01-01', periods=10))
439
- >>> hs_acf_plot(data)
440
-
441
- 콜백으로 제목 추가:
442
-
443
- >>> hs_acf_plot(data, callback=lambda ax: ax.set_title('My ACF Plot'))
436
+ ```python
437
+ from hossam import *
438
+ from pandas import Series, date_range
439
+
440
+ # 기본 ACF 플롯:
441
+ data = Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
442
+ index=date_range('2020-01-01', periods=10))
443
+ hs_timeseries.acf_plot(data)
444
+
445
+ # 콜백으로 제목 추가:
446
+ hs_timeseries.acf_plot(data, callback=lambda ax: ax.set_title('My ACF Plot'))
447
+ ```
444
448
  """
445
449
  fig = plt.figure(figsize=figsize, dpi=dpi)
446
450
  ax = fig.gca()
@@ -481,17 +485,18 @@ def pacf_plot(
481
485
  - 파란색 영역(신뢰구간)을 벗어나는 lag가 AR 항의 개수를 나타낸다.
482
486
 
483
487
  Examples:
484
- 기본 PACF 플롯:
485
-
486
- >>> from hossam import pacf_plot
487
- >>> import pandas as pd
488
- >>> data = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
489
- ... index=pd.date_range('2020-01-01', periods=10))
490
- >>> hs_pacf_plot(data)
491
-
492
- 콜백으로 커스터마이징:
493
-
494
- >>> hs_pacf_plot(data, callback=lambda ax: ax.set_ylabel('Partial Correlation'))
488
+ ```python
489
+ from hossam import *
490
+ from pandas import Series, date_range
491
+
492
+ # 기본 PACF 플롯:
493
+ data = Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
494
+ index=date_range('2020-01-01', periods=10))
495
+ hs_timeseries.pacf_plot(data)
496
+
497
+ # 콜백으로 커스터마이징:
498
+ hs_timeseries.pacf_plot(data, callback=lambda ax: ax.set_ylabel('Partial Correlation'))
499
+ ```
495
500
  """
496
501
  fig = plt.figure(figsize=figsize, dpi=dpi)
497
502
  ax = fig.gca()
@@ -535,16 +540,17 @@ def acf_pacf_plot(
535
540
  실전에서는 auto_arima를 사용한 자동 선택도 권장된다.
536
541
 
537
542
  Examples:
538
- ARIMA 모델링 전 차수 탐색:
539
-
540
- >>> from hossam import acf_pacf_plot, hs_diff
541
- >>> import pandas as pd
542
- >>> data = pd.Series([10, 12, 13, 15, 14, 16, 18, 20],
543
- ... index=pd.date_range('2020-01-01', periods=8))
544
- >>> # 1차 차분 후 정상성 확보
545
- >>> stationary = diff(data, plot=False, max_diff=1)
546
- >>> # ACF/PACF p, q 결정
547
- >>> hs_acf_pacf_plot(stationary)
543
+ ```python
544
+ from hossam import *
545
+ from pandas import Series, date_range
546
+
547
+ # ARIMA 모델링 차수 탐색:
548
+ data = Series([10, 12, 13, 15, 14, 16, 18, 20],
549
+ index=date_range('2020-01-01', periods=8))
550
+
551
+ # 1차 차분 후 ACF/PACF 플롯:
552
+ stationary = hs_timeseries.diff(data, 'value')
553
+ hs_timeseries.acf_pacf_plot(stationary)
548
554
  """
549
555
  fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(figsize[0], figsize[1] * 2), dpi=dpi)
550
556
 
@@ -615,22 +621,22 @@ def arima(
615
621
  - auto=True는 시간이 오래 걸릴 수 있으나 최적 모델을 찾아줌
616
622
 
617
623
  Examples:
618
- 수동 ARIMA(2,1,2) 모델:
619
-
620
- >>> from hossam import arima, hs_timeseries_split
621
- >>> import pandas as pd
622
- >>> data = pd.Series([100, 102, 105, 110, 115, 120, 125, 130],
623
- ... index=pd.date_range('2020-01', periods=8, freq='M'))
624
- >>> train, test = hs_timeseries_split(data, test_size=0.25)
625
- >>> model = hs_arima(train, test, p=2, d=1, q=2)
626
-
627
- auto_arima로 최적 모델 탐색:
628
-
629
- >>> model = hs_arima(train, test, auto=True)
630
-
631
- 계절성 모델 SARIMA(1,1,1)(1,1,1,12):
632
-
633
- >>> model = hs_arima(train, test, p=1, d=1, q=1, s=12)
624
+ ```python
625
+ from hossam import *
626
+ from pandas import Series, date_range
627
+
628
+ # 수동으로 ARIMA(2,1,2) 모델 생성:
629
+ data = Series([100, 102, 105, 110, 115, 120, 125, 130],
630
+ index=date_range('2020-01', periods=8, freq='M'))
631
+ train, test = hs_timeseries.train_test_split(data, test_size=0.25)
632
+ model = hs_timeseries.arima(train, test, p=2, d=1, q=2)
633
+
634
+ # auto_arima로 최적 모델 탐색:
635
+ model = hs_timeseries.arima(train, test, auto=True)
636
+
637
+ # 계절성 모델 SARIMA(1,1,1)(1,1,1,12):
638
+ model = hs_timeseries.arima(train, test, p=1, d=1, q=1, s=12)
639
+ ```
634
640
  """
635
641
  model = None
636
642
 
@@ -821,30 +827,32 @@ def prophet(
821
827
  - 외부 회귀변수는 callback에서 add_regressor()로 추가 가능.
822
828
 
823
829
  Examples:
824
- 기본 사용 (단일 모델):
830
+ ```python
831
+ from hossam import *
832
+ from pandas import DataFrame, date_range
825
833
 
826
- >>> from hossam import prophet
827
- >>> import pandas as pd
828
- >>> train = pd.DataFrame({
829
- ... 'ds': pd.date_range('2020-01-01', periods=100),
830
- ... 'y': range(100)
831
- ... })
832
- >>> model, params, score, forecast, pred = hs_prophet(train)
834
+ # 기본 사용 (단일 모델):
835
+ train = DataFrame({
836
+ 'ds': date_range('2020-01-01', periods=100),
837
+ 'y': range(100)
838
+ })
839
+ model, params, score, forecast, pred = hs_timeseries.prophet(train)
833
840
 
834
841
  하이퍼파라미터 그리드 서치:
835
842
 
836
- >>> model, params, score, forecast, pred = hs_prophet(
837
- ... train,
838
- ... changepoint_prior_scale=[0.001, 0.01, 0.1],
839
- ... seasonality_prior_scale=[0.01, 0.1, 1.0],
840
- ... seasonality_mode=['additive', 'multiplicative']
841
- ... )
843
+ model, params, score, forecast, pred = hs_timeseries.prophet(
844
+ train,
845
+ changepoint_prior_scale=[0.001, 0.01, 0.1],
846
+ seasonality_prior_scale=[0.01, 0.1, 1.0],
847
+ seasonality_mode=['additive', 'multiplicative']
848
+ )
842
849
 
843
- 휴일 효과 추가:
850
+ # 휴일 효과 추가:
851
+ def add_holidays(m):
852
+ m.add_country_holidays(country_name='KR')
844
853
 
845
- >>> def add_holidays(m):
846
- ... m.add_country_holidays(country_name='KR')
847
- >>> model, _, _, _, _ = hs_prophet(train, callback=add_holidays)
854
+ model, _, _, _, _ = hs_timeseries.prophet(train, callback=add_holidays)
855
+ ```
848
856
  """
849
857
 
850
858
  # logger = logging.getLogger("cmdstanpy")
@@ -963,19 +971,20 @@ def prophet_report(
963
971
  - 변화점은 모델이 추세 변화를 감지한 시점을 수직선으로 표시
964
972
 
965
973
  Examples:
966
- 기본 리포트 출력:
967
-
968
- >>> from hossam import prophet, hs_prophet_report
969
- >>> model, _, _, forecast, pred = hs_prophet(train)
970
- >>> hs_prophet_report(model, forecast, pred)
974
+ ```python
975
+ from hossam import *
976
+ from pandas import DataFrame, date_range
971
977
 
972
- test 데이터와 함께 성능 평가:
978
+ # 기본 리포트 출력:
979
+ model, _, _, forecast, pred = hs_timeseries.prophet(train)
980
+ hs_timeseries.prophet_report(model, forecast, pred)
973
981
 
974
- >>> hs_prophet_report(model, forecast, pred, test=test)
982
+ # test 데이터와 함께 성능 평가:
983
+ hs_timeseries.prophet_report(model, forecast, pred, test=test)
975
984
 
976
- 예측 테이블 출력:
977
-
978
- >>> hs_prophet_report(model, forecast, pred, print_forecast=True)
985
+ # 예측 테이블 출력:
986
+ hs_timeseries.prophet_report(model, forecast, pred, print_forecast=True)
987
+ ```
979
988
  """
980
989
 
981
990
  # ------------------------------------------------------
@@ -1068,22 +1077,22 @@ def get_weekend_df(start: any, end: any = None) -> DataFrame:
1068
1077
  - 토요일(Saturday), 일요일(Sunday)을 자동 탐지하여 추출한다.
1069
1078
 
1070
1079
  Examples:
1071
- 2020년 전체 주말 생성:
1072
-
1073
- >>> from hossam import get_weekend_df
1074
- >>> weekends = get_weekend_df('2020-01-01', '2020-12-31')
1075
- >>> print(len(weekends)) # 104 (52주 × 2일)
1076
-
1077
- 현재까지의 주말:
1078
-
1079
- >>> weekends = get_weekend_df('2023-01-01')
1080
-
1081
- Prophet 모델에 주말 효과 추가:
1082
-
1083
- >>> from prophet import Prophet
1084
- >>> weekends = get_weekend_df('2020-01-01', '2025-12-31')
1085
- >>> model = Prophet(holidays=weekends)
1086
- >>> model.fit(train)
1080
+ ```python
1081
+ from hossam import *
1082
+ from pandas import DataFrame, date_range
1083
+
1084
+ # 2020년 전체 주말 생성:
1085
+ weekends = hs_timeseries.get_weekend_df('2020-01-01', '2020-12-31')
1086
+ print(len(weekends)) # 104 (52주 × 2일)
1087
+
1088
+ # 현재까지의 주말:
1089
+ weekends = hs_timeseries.get_weekend_df('2023-01-01')
1090
+ print(weekends.head())
1091
+
1092
+ # Prophet 모델에 주말 효과 추가:
1093
+ weekends = hs_timeseries.get_weekend_df('2020-01-01', '2025-12-31')
1094
+ model = hs_timeseries.prophet(train, holidays=weekends)
1095
+ ```
1087
1096
  """
1088
1097
  if end is None:
1089
1098
  end = dt.datetime.now()
hossam/hs_util.py CHANGED
@@ -1,7 +1,8 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  # -------------------------------------------------------------
3
3
  from typing import TYPE_CHECKING
4
-
4
+ from importlib.metadata import distributions
5
+ import pandas as pd
5
6
  import numpy as np
6
7
  from pandas import DataFrame, DatetimeIndex, read_csv, read_excel
7
8
  from scipy.stats import normaltest
@@ -9,6 +10,25 @@ from tabulate import tabulate
9
10
 
10
11
  from .data_loader import load_data as _load_data_remote
11
12
 
13
+ # ===================================================================
14
+ # 설치된 파이썬 패키지 목록 반환
15
+ # ===================================================================
16
+ def my_packages():
17
+ """
18
+ 현재 파이썬 인터프리터에 설치된 모든 패키지의 이름과 버전을
19
+ 패키지 이름순으로 정렬하여 pandas DataFrame으로 반환합니다.
20
+ Returns:
21
+ pd.DataFrame: columns=['name', 'version']
22
+ """
23
+ pkgs = []
24
+ for dist in distributions():
25
+ name = dist.metadata['Name'] if 'Name' in dist.metadata else dist.name
26
+ version = dist.version
27
+ summary = dist.metadata.get('Summary', '')
28
+ pkgs.append((name, version, summary))
29
+ pkgs = sorted(pkgs, key=lambda x: x[0].lower())
30
+ return pd.DataFrame(pkgs, columns=['name', 'version', 'summary'])
31
+
12
32
  # ===================================================================
13
33
  # 정규분포 데이터 생성
14
34
  # ===================================================================
@@ -27,10 +47,10 @@ def make_normalize_values(
27
47
  np.ndarray: 정규분포를 따르는 데이터
28
48
 
29
49
  Examples:
30
- >>> from hossam.util import make_normalize_values
31
- >>> x = make_normalize_values(mean=0.0, std=1.0, size=100)
32
- >>> x.shape
33
- (100,)
50
+ ```python
51
+ from hossam import *
52
+ x = hs.util.make_normalize_values(mean=0.0, std=1.0, size=100)
53
+ ```
34
54
  """
35
55
  p = 0.0
36
56
  x: np.ndarray = np.array([])
@@ -95,9 +115,11 @@ def pretty_table(data: DataFrame, tablefmt="simple", headers: str = "keys") -> N
95
115
  None
96
116
 
97
117
  Examples:
98
- >>> from hossam.util import pretty_table
99
- >>> from pandas import DataFrame
100
- >>> pretty_table(DataFrame({"a":[1,2],"b":[3,4]}))
118
+ ```python
119
+ from hossam import *
120
+ from pandas import DataFrame
121
+ hs_util.pretty_table(DataFrame({"a":[1,2],"b":[3,4]}))
122
+ ```
101
123
  """
102
124
 
103
125
  tabulate.WIDE_CHARS_MODE = False
@@ -113,10 +135,10 @@ def pretty_table(data: DataFrame, tablefmt="simple", headers: str = "keys") -> N
113
135
  # ===================================================================
114
136
  def __data_info(
115
137
  origin: DataFrame,
116
- index_col: str = None,
138
+ index_col: str | None = None,
117
139
  timeindex: bool = False,
118
140
  info: bool = True,
119
- categories: list = None,
141
+ categories: list | None = None,
120
142
  ) -> DataFrame:
121
143
  """데이터 프레임을 통해 필요한 초기 작업을 수행한다.
122
144
 
@@ -173,11 +195,11 @@ def __data_info(
173
195
  # 데이터 로드
174
196
  # ===================================================================
175
197
  def load_data(key: str,
176
- index_col: str = None,
198
+ index_col: str | None = None,
177
199
  timeindex: bool = False,
178
200
  info: bool = True,
179
- categories: list = None,
180
- local: str = None) -> DataFrame:
201
+ categories: list | None = None,
202
+ local: str | None = None) -> DataFrame:
181
203
  """데이터 키를 통해 데이터를 로드한 뒤 기본 전처리/출력을 수행한다.
182
204
 
183
205
  Args:
@@ -192,14 +214,16 @@ def load_data(key: str,
192
214
  DataFrame: 전처리(인덱스 설정, 카테고리 변환)가 완료된 데이터프레임
193
215
 
194
216
  Examples:
195
- >>> from hossam.util import load_data
196
- >>> df = load_data("AD_SALES", index_col=None, timeindex=False, info=False)
197
- >>> isinstance(df.columns, object)
198
- True
217
+ ```python
218
+ from hossam import *
219
+ df = hs_util.load_data("AD_SALES", index_col=None, timeindex=False, info=False)
220
+ ```
199
221
  """
200
222
 
201
223
  k = key.lower()
202
224
 
225
+ origin = None
226
+
203
227
  if k.endswith(".xlsx"):
204
228
  origin = read_excel(key)
205
229
  elif k.endswith(".csv"):
@@ -207,4 +231,7 @@ def load_data(key: str,
207
231
  else:
208
232
  origin = _load_data_remote(key, local)
209
233
 
234
+ if origin is None:
235
+ raise RuntimeError("Data loading failed: origin is None")
236
+
210
237
  return __data_info(origin, index_col, timeindex, info, categories)