analyser_hj3415 2.10.6__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,670 @@
1
+ """
2
+ Time Series Analysis
3
+ """
4
+ import os
5
+ import numpy as np
6
+ import yfinance as yf
7
+ from datetime import datetime, timedelta
8
+ import pandas as pd
9
+ from typing import Optional, Tuple
10
+ import plotly.graph_objs as go
11
+ from plotly.offline import plot
12
+ import matplotlib.pyplot as plt # Matplotlib 수동 임포트
13
+ from sklearn.preprocessing import MinMaxScaler
14
+ from tensorflow.keras.models import Sequential
15
+ from tensorflow.keras.layers import LSTM, Dense, Dropout
16
+ from tensorflow.keras.callbacks import EarlyStopping
17
+ from tensorflow.keras import Input
18
+ from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
19
+ from dataclasses import dataclass
20
+
21
+ from utils_hj3415 import tools, setup_logger
22
+ from db_hj3415 import myredis
23
+ from analyser_hj3415.analyser import score
24
+
25
+
26
+ mylogger = setup_logger(__name__,'WARNING')
27
+ expire_time = tools.to_int(os.getenv('DEFAULT_EXPIRE_TIME_H', 48)) * 3600
28
+
29
+
30
+ @dataclass
31
+ class LSTMData:
32
+ code: str
33
+
34
+ data_2d: np.ndarray
35
+ train_size: int
36
+ train_data_2d: np.ndarray
37
+ test_data_2d: np.ndarray
38
+
39
+ X_train_3d: np.ndarray
40
+ X_test_3d: np.ndarray
41
+ y_train_1d: np.ndarray
42
+ y_test_1d: np.ndarray
43
+
44
+
45
+ @dataclass
46
+ class LSTMGrade:
47
+ """
48
+ 딥러닝 모델의 학습 결과를 평가하기 위해 사용하는 데이터 클래스
49
+ """
50
+ code: str
51
+ train_mse: float
52
+ train_mae: float
53
+ train_r2: float
54
+ test_mse: float
55
+ test_mae: float
56
+ test_r2: float
57
+
58
+
59
+ class MyLSTM:
60
+ """
61
+ LSTM(Long Short-Term Memory)
62
+ """
63
+ # 미래 몇일을 예측할 것인가?
64
+ future_days = 30
65
+
66
+ def __init__(self, code: str):
67
+ assert tools.is_6digit(code), f'Invalid value : {code}'
68
+ self._code = code
69
+ self.name = myredis.Corps(code, 'c101').get_name()
70
+ self.scaler = MinMaxScaler(feature_range=(0, 1))
71
+ self.raw_data = pd.DataFrame()
72
+ self.lstm_data = LSTMData(
73
+ code=self.code,
74
+ data_2d=np.array([]),
75
+ train_size=0,
76
+ train_data_2d=np.array([]),
77
+ test_data_2d=np.array([]),
78
+ X_train_3d=np.array([]),
79
+ X_test_3d=np.array([]),
80
+ y_train_1d=np.array([]),
81
+ y_test_1d=np.array([]),
82
+ )
83
+
84
+ @property
85
+ def code(self) -> str:
86
+ return self._code
87
+
88
+ @code.setter
89
+ def code(self, code: str):
90
+ assert tools.is_6digit(code), f'Invalid value : {code}'
91
+ mylogger.debug(f'change code : {self.code} -> {code}')
92
+
93
+ self._code = code
94
+ self.name = myredis.Corps(code, 'c101').get_name()
95
+ self.scaler = MinMaxScaler(feature_range=(0, 1))
96
+ self.raw_data = pd.DataFrame()
97
+ self.lstm_data = LSTMData(
98
+ code=self.code,
99
+ data_2d=np.array([]),
100
+ train_size=0,
101
+ train_data_2d=np.array([]),
102
+ test_data_2d=np.array([]),
103
+ X_train_3d=np.array([]),
104
+ X_test_3d=np.array([]),
105
+ y_train_1d=np.array([]),
106
+ y_test_1d=np.array([]),
107
+ )
108
+
109
+ def initializing(self):
110
+ """
111
+ Fetches stock price data for the last four years from Yahoo Finance and prepares
112
+ it for use in an LSTM model by normalizing, splitting into training and testing datasets,
113
+ and reshaping the data.
114
+ """
115
+ def get_raw_data() -> pd.DataFrame:
116
+ """
117
+ 야후에서 해당 종목의 4년간 주가 raw data를 받아온다.
118
+ :return:
119
+ """
120
+ # 오늘 날짜 가져오기
121
+ today = datetime.today()
122
+
123
+ # 4년 전 날짜 계산 (4년 = 365일 * 4)
124
+ four_years_ago = today - timedelta(days=365 * 4)
125
+ mylogger.info(
126
+ f"Get raw data from yfinance - start: {four_years_ago.strftime('%Y-%m-%d')}, end: {today.strftime('%Y-%m-%d')}")
127
+
128
+ df = yf.download(
129
+ self.code + '.KS',
130
+ start=four_years_ago.strftime('%Y-%m-%d'),
131
+ end=today.strftime('%Y-%m-%d')
132
+ )
133
+ df.index = df.index.tz_localize(None)
134
+ mylogger.debug(df)
135
+ return df
136
+
137
+ def preprocessing_for_lstm() -> LSTMData:
138
+ """
139
+ lstm이 사용할 수 있도록 데이터 준비(정규화 및 8:2 훈련데이터 검증데이터 분리 및 차원변환)
140
+ :return:
141
+ """
142
+ mylogger.info("lstm이 사용할 수 있도록 데이터 준비(정규화 및 8:2 훈련데이터 검증데이터 분리 및 차원변환)")
143
+ # 필요한 열만 선택 (종가만 사용) - 2차웜 배열로 변환
144
+ data_2d = self.raw_data['Close'].values.reshape(-1, 1)
145
+ mylogger.debug(f"종가데이터 2차원베열값[:5] : {data_2d[:5]}")
146
+
147
+ # 데이터 정규화 (0과 1 사이로 스케일링)
148
+ scaled_data_2d = self.scaler.fit_transform(data_2d)
149
+
150
+ # 학습 데이터 생성
151
+ # 주가 데이터를 80%는 학습용, 20%는 테스트용으로 분리하는 코드
152
+ train_size = int(len(scaled_data_2d) * 0.8)
153
+ train_data_2d = scaled_data_2d[:train_size]
154
+ test_data_2d = scaled_data_2d[train_size:]
155
+ mylogger.info(f'총 {len(data_2d)}개 데이터, train size : {train_size}')
156
+
157
+ # 학습 데이터에 대한 입력(X)과 정답(y)를 생성
158
+ def create_dataset(data, time_step=60):
159
+ X, y = [], []
160
+ for i in range(len(data) - time_step):
161
+ X.append(data[i:i + time_step, 0])
162
+ y.append(data[i + time_step, 0])
163
+ return np.array(X), np.array(y)
164
+
165
+ X_train, y_train_1d = create_dataset(train_data_2d)
166
+ X_test, y_test_1d = create_dataset(test_data_2d)
167
+ mylogger.debug(f"훈련데이터 shape (입력, 정답) / {X_train.shape}")
168
+ mylogger.debug(f"테스트데이터 shape (입력, 정답) / {X_test.shape}")
169
+
170
+ try:
171
+ mylogger.debug("2차원 데이터를 3차원으로 변환합니다.")
172
+ # LSTM 모델 입력을 위해 데이터를 3차원으로 변환
173
+ X_train_3d = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
174
+ X_test_3d = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
175
+ except IndexError:
176
+ return LSTMData(
177
+ code=self.code,
178
+ data_2d=np.array([]),
179
+ train_size=0,
180
+ train_data_2d=np.array([]),
181
+ test_data_2d=np.array([]),
182
+ X_train_3d=np.array([]),
183
+ X_test_3d=np.array([]),
184
+ y_train_1d=np.array([]),
185
+ y_test_1d=np.array([]),
186
+ )
187
+
188
+ mylogger.debug(
189
+ f'차원 - X_train_3d : {X_train_3d.ndim}, X_test_3d : {X_test_3d.ndim}, y_train : {y_train_1d.ndim}, y_test : {y_test_1d.ndim}')
190
+ mylogger.debug(
191
+ f'len - X_train_3d : {len(X_train_3d)}, X_test_3d : {len(X_test_3d)}, y_train : {len(y_train_1d)}, y_test : {len(y_test_1d)}')
192
+
193
+ return LSTMData(
194
+ code=self.code,
195
+ data_2d=data_2d,
196
+ train_size=train_size,
197
+ train_data_2d=train_data_2d,
198
+ test_data_2d=test_data_2d,
199
+ X_train_3d=X_train_3d,
200
+ X_test_3d=X_test_3d,
201
+ y_train_1d=y_train_1d,
202
+ y_test_1d=y_test_1d,
203
+ )
204
+ self.raw_data = get_raw_data()
205
+ self.lstm_data = preprocessing_for_lstm()
206
+
207
+ def ensemble_training(self, num) -> Tuple[list, LSTMGrade]:
208
+ """
209
+ Represents the implementation for training, predicting, and evaluating an LSTM-based deep learning model.
210
+ Defines functions for LSTM model training, prediction, grading results, and ensemble model training.
211
+
212
+ Methods defined:
213
+ - model_training: Constructs and trains the LSTM model with specified layers and configurations.
214
+ - prediction: Uses a trained model to perform predictions, restores normalization, and returns results.
215
+ - grading: Analyzes the training and testing predictions, computes evaluation metrics, and returns grading details.
216
+ - ensemble_training: Trains multiple LSTM models to create ensemble predictions and future forecasts.
217
+
218
+ ensemble_training:
219
+ Trains multiple LSTM models iteratively and forms ensemble predictions for training, testing datasets,
220
+ and future trend forecasting. Evaluates the models collectively using grading metrics.
221
+
222
+ Arguments:
223
+ num: int
224
+ The number of ensemble LSTM models to train.
225
+
226
+ Returns:
227
+ Tuple[list, LSTMGrade]
228
+ A list of predicted scaled values for future forecasts and the performance grading object.
229
+
230
+ Raises:
231
+ Does not explicitly raise errors but logs warnings for possible issues during training or prediction.
232
+ """
233
+ def model_training() -> Sequential:
234
+ # LSTM 모델 생성 - 유닛과 드롭아웃의 수는 테스트로 최적화 됨.
235
+ model = Sequential()
236
+ mylogger.debug(f"훈련 데이터 shape - {self.lstm_data.X_train_3d.shape}")
237
+ try:
238
+ # Input(shape=(50, 1))는 50개의 타임스텝을 가지는 입력 데이터를 처리하며, 각 타임스텝에 1개의 특성이 있다는 것을 의미
239
+ model.add(Input(shape=(self.lstm_data.X_train_3d.shape[1], 1))) # 입력 레이어에 명시적으로 Input을 사용
240
+ except IndexError:
241
+ mylogger.error("모델 트레이닝을 위한 자료가 부족합니다.")
242
+ return model
243
+
244
+ model.add(LSTM(units=150, return_sequences=True))
245
+ model.add(Dropout(0.2))
246
+ model.add(LSTM(units=75, return_sequences=False))
247
+ model.add(Dropout(0.2))
248
+ model.add(Dense(units=25))
249
+ model.add(Dropout(0.3))
250
+ model.add(Dense(units=1))
251
+
252
+ # 모델 요약 출력
253
+ model.summary()
254
+
255
+ # 모델 컴파일 및 학습
256
+ model.compile(optimizer='adam', loss='mean_squared_error')
257
+
258
+ # 조기 종료 설정
259
+ early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
260
+
261
+ # 모델 학습 - 과적합 방지위한 조기종료 세팅
262
+ model.fit(self.lstm_data.X_train_3d, self.lstm_data.y_train_1d,
263
+ epochs=75, batch_size=32, validation_data=(self.lstm_data.X_test_3d, self.lstm_data.y_test_1d),
264
+ callbacks=[early_stopping])
265
+ return model
266
+
267
+ def prediction(model_in: Sequential, data: np.ndarray) -> np.ndarray:
268
+ """
269
+ 훈련될 모델을 통해 예측을 시행하여 정규화를 복원하고 결과 반환한다.
270
+ :param model_in:
271
+ :param data:
272
+ :return:
273
+ """
274
+ predictions_2d = model_in.predict(data)
275
+ predictions_scaled_2d = self.scaler.inverse_transform(predictions_2d) # 스케일링 복원
276
+ mylogger.info(
277
+ f'predictions_scaled_2d : ndim - {predictions_scaled_2d.ndim} len - {len(predictions_scaled_2d)}') # numpy.ndarray 타입
278
+ mylogger.debug(f'predictions_scaled_2d[:5] :{predictions_scaled_2d[:5]}')
279
+ return predictions_scaled_2d
280
+
281
+ def grading(train_predictions: list, test_predictions: list) -> LSTMGrade:
282
+ """
283
+ 딥러닝 결과를 분석하기 위한 함수
284
+ :param train_predictions:
285
+ :param test_predictions:
286
+ :return:
287
+ """
288
+ if len(train_predictions) == 0 or len(test_predictions) == 0:
289
+ mylogger.warning("딥러닝 결과가 없어서 LSTMGrade 데이터를 비워서 반환합니다.")
290
+ return LSTMGrade(
291
+ code= self.code,
292
+ train_mse=float('nan'),
293
+ train_mae=float('nan'),
294
+ train_r2=float('nan'),
295
+ test_mse=float('nan'),
296
+ test_mae=float('nan'),
297
+ test_r2=float('nan'),
298
+ )
299
+
300
+ # 예측값을 평균내서 최종 예측값 도출
301
+ mean_train_prediction_2d = np.mean(train_predictions, axis=0)
302
+ mean_test_predictions_2d = np.mean(test_predictions, axis=0)
303
+
304
+ # y값(정답) 정규화 해제
305
+ y_train_scaled_2d = self.scaler.inverse_transform(self.lstm_data.y_train_1d.reshape(-1, 1))
306
+ y_test_scaled_2d = self.scaler.inverse_transform(self.lstm_data.y_test_1d.reshape(-1, 1))
307
+
308
+ # 평가 지표 계산
309
+ train_mse = mean_squared_error(y_train_scaled_2d, mean_train_prediction_2d)
310
+ train_mae = mean_absolute_error(y_train_scaled_2d, mean_train_prediction_2d)
311
+ train_r2 = r2_score(y_train_scaled_2d, mean_train_prediction_2d)
312
+
313
+ test_mse = mean_squared_error(y_test_scaled_2d, mean_test_predictions_2d)
314
+ test_mae = mean_absolute_error(y_test_scaled_2d, mean_test_predictions_2d)
315
+ test_r2 = r2_score(y_test_scaled_2d, mean_test_predictions_2d)
316
+
317
+ # 평가 결과 출력
318
+ print("Training Data:")
319
+ print(f"Train MSE: {train_mse}, Train MAE: {train_mae}, Train R²: {train_r2}")
320
+ print("\nTesting Data:")
321
+ print(f"Test MSE: {test_mse}, Test MAE: {test_mae}, Test R²: {test_r2}")
322
+ # mse, mae는 작을수록 좋으며 R^2은 0-1 사이값 1에 가까울수록 정확함
323
+ # 과적합에 대한 평가는 train 과 test를 비교하여 test가 너무 않좋으면 과적합 의심.
324
+
325
+ return LSTMGrade(
326
+ code=self.code,
327
+ train_mse=train_mse,
328
+ train_mae=train_mae,
329
+ train_r2=train_r2,
330
+ test_mse=test_mse,
331
+ test_mae=test_mae,
332
+ test_r2=test_r2,
333
+ )
334
+
335
+ ensemble_train_predictions_2d = []
336
+ ensemble_test_predictions_2d = []
337
+ ensemble_future_predictions_2d = []
338
+
339
+ for i in range(num):
340
+ print(f"Training model {i + 1}/{num}...")
341
+ model = model_training()
342
+
343
+ if len(model.layers) == 0:
344
+ mylogger.warning("이 모델은 빈 Sequential() 입니다.")
345
+ return [], grading([],[])
346
+ else:
347
+ mylogger.info("레이어가 있는 모델입니다.")
348
+
349
+ # 훈련 데이터 예측
350
+ train_predictions_scaled_2d = prediction(model, self.lstm_data.X_train_3d)
351
+ ensemble_train_predictions_2d.append(train_predictions_scaled_2d)
352
+
353
+ # 테스트 데이터 예측
354
+ test_predictions_scaled_2d = prediction(model, self.lstm_data.X_test_3d)
355
+ ensemble_test_predictions_2d.append(test_predictions_scaled_2d)
356
+
357
+ # 8. 미래 30일 예측
358
+ # 마지막 60일간의 데이터를 기반으로 미래 30일을 예측
359
+
360
+ last_60_days_2d = self.lstm_data.test_data_2d[-60:]
361
+ last_60_days_3d = last_60_days_2d.reshape(1, -1, 1)
362
+
363
+ future_predictions = []
364
+ for _ in range(self.future_days):
365
+ predicted_price_2d = model.predict(last_60_days_3d)
366
+ future_predictions.append(predicted_price_2d[0][0])
367
+
368
+ # 예측값을 다시 입력으로 사용하여 새로운 예측을 만듦
369
+ predicted_price_reshaped = np.reshape(predicted_price_2d, (1, 1, 1)) # 3D 배열로 변환
370
+ last_60_days_3d = np.append(last_60_days_3d[:, 1:, :], predicted_price_reshaped, axis=1)
371
+
372
+ # 예측된 주가를 다시 스케일링 복원
373
+ future_predictions_2d = np.array(future_predictions).reshape(-1, 1)
374
+ future_predictions_scaled_2d = self.scaler.inverse_transform(future_predictions_2d)
375
+ ensemble_future_predictions_2d.append(future_predictions_scaled_2d)
376
+
377
+ lstm_grade = grading(ensemble_train_predictions_2d, ensemble_test_predictions_2d)
378
+
379
+ return ensemble_future_predictions_2d, lstm_grade
380
+
381
+ def get_final_predictions(self, refresh: bool, num=5) -> Tuple[dict, LSTMGrade]:
382
+ """
383
+ Fetches final predictions based on an ensemble method using deep learning models. This process averages the
384
+ predictions to forecast future data, primarily for debugging purposes as it is challenging to utilize
385
+ Redis cache for evaluation.
386
+
387
+ Parameters:
388
+ refresh (bool): Flag to indicate whether to refresh cached data.
389
+ num (int, optional): Number of iterations for predictions. Defaults to 5.
390
+
391
+ Returns:
392
+ Tuple[dict, LSTMGrade]: A tuple containing a dictionary of future predictions and their corresponding
393
+ evaluation grade.
394
+
395
+ Raises:
396
+ AssertionError: If the lengths of future dates and predicted values do not match.
397
+
398
+ Notes:
399
+ - This function integrates ensemble training and caching of predictive data.
400
+ - The future prediction keys correspond to the dates in "YYYY-MM-DD" format.
401
+ - Makes use of Redis for data retrieval and caching mechanisms.
402
+ """
403
+ print("**** Start get_final_predictions... ****")
404
+ redis_name = f'{self.code}_mylstm_predictions'
405
+
406
+ print(
407
+ f"redisname: '{redis_name}' / refresh : {refresh} / expire_time : {expire_time/3600}h")
408
+
409
+ def fetch_final_predictions(num_in) -> tuple:
410
+ """
411
+ 앙상블법으로 딥러닝을 모델을 반복해서 평균을 내서 미래를 예측한다. 평가는 래시스 캐시로 반환하기 어려워 일단 디버그 용도로만 사용하기로
412
+ :param num_in:
413
+ :return:
414
+ """
415
+ def make_future_data_dict(future_predictions) -> dict:
416
+ # 시각화를 위한 준비 - 날짜 생성 (미래 예측 날짜), 미래예측값 평균
417
+ mylogger.debug(self.raw_data)
418
+ last_date = self.raw_data.index[-1]
419
+ mylogger.debug(f'last_date : {last_date}')
420
+ future_dates = pd.date_range(last_date, periods=self.future_days + 1).tolist()[1:]
421
+ mylogger.debug(f'future_dates : {future_dates}')
422
+ final_future_predictions = np.mean(future_predictions, axis=0).tolist()
423
+ mylogger.debug(f'final_future_predictions(예측주가 리스트) : {final_future_predictions}')
424
+
425
+ assert len(future_dates) == len(
426
+ final_future_predictions), "future_dates 와 final_future_predictions 개수가 일치하지 않습니다."
427
+
428
+ data = {}
429
+ for i in range(len(future_dates)):
430
+ data[future_dates[i].strftime("%Y-%m-%d")] = final_future_predictions[i][0]
431
+ return data
432
+
433
+ # 앙상블 트레이닝 시행
434
+ future_predictions_2d, lstm_grade = self.ensemble_training(num=num_in)
435
+ mylogger.debug(f'future_predictions_2d[:5] : {future_predictions_2d[:5]}')
436
+ mylogger.debug(f'lstm grade(학습결과평과) : {lstm_grade}')
437
+ if len(future_predictions_2d) == 0:
438
+ return {}, lstm_grade
439
+
440
+ # {날짜(유닉스타임): 예측주가} 형식으로 딕서너리로 제작
441
+ future_data = make_future_data_dict(future_predictions_2d)
442
+ mylogger.debug(f'future_data : {future_data}')
443
+
444
+ return future_data, lstm_grade
445
+
446
+ return myredis.Base.fetch_and_cache_data(redis_name, refresh, fetch_final_predictions, num, timer=expire_time)
447
+
448
+ def export(self, refresh=False, to="str", num=5) -> Optional[str]:
449
+ """
450
+ Prepares and exports stock price trend graphs based on past and predicted data.
451
+ The graphs include markers for actual prices along with a line-plot for forecasted prices.
452
+ The function offers multiple export formats: an HTML string, a PNG file, or an HTML file.
453
+
454
+ Parameters
455
+ ----------
456
+ refresh : bool, optional
457
+ Specifies whether to refresh dataset before preparing future data. Defaults to False.
458
+ to : str, optional
459
+ Determines the export format of the graph. Valid choices are
460
+ 'str', 'png', or 'htmlfile'. Defaults to 'str'.
461
+ num : int, optional
462
+ Specifies the number of future days to include in prediction. Defaults to 5.
463
+
464
+ Returns
465
+ -------
466
+ Optional[str]
467
+ A string representation of the graph in HTML format if `to` is set to 'str'.
468
+ Returns None if `to` is set to either 'png' or 'htmlfile'.
469
+
470
+ Raises
471
+ ------
472
+ Exception
473
+ Raised if the `to` parameter does not match the allowed values: 'str', 'png', or 'htmlfile'.
474
+ """
475
+ def prepare_past_data(past_days) -> tuple:
476
+ # 데이터 준비
477
+ raw_data_copied = self.raw_data.reset_index()
478
+ data = raw_data_copied[['Date', 'Close']][-past_days:].reset_index(drop=True)
479
+
480
+ # 'Date'와 'Close' 열 추출
481
+ past_dates = pd.to_datetime(data['Date'])
482
+ past_prices = data['Close']
483
+
484
+ # 'past_prices'가 Series인지 확인
485
+ if isinstance(past_prices, pd.DataFrame):
486
+ past_prices = past_prices.squeeze()
487
+
488
+ # 'Close' 열의 데이터 타입 변경
489
+ past_prices = past_prices.astype(float)
490
+ return past_dates, past_prices
491
+
492
+ def prepare_future_data(refresh_in, num_in) -> tuple:
493
+ future_data, lstm_grade = self.get_final_predictions(refresh=refresh_in, num=num_in)
494
+
495
+ # 예측 데이터 준비
496
+ future_dates = pd.to_datetime(list(future_data.keys()))
497
+
498
+ future_prices = pd.Series(future_data.values(), index=range(len(future_data.values()))).astype(float)
499
+ return future_dates, future_prices
500
+
501
+ self.initializing()
502
+ past_dates, past_prices = prepare_past_data(past_days=120)
503
+ future_dates, future_prices = prepare_future_data(refresh_in=refresh, num_in=num)
504
+
505
+ # 그래프 생성
506
+ fig = go.Figure()
507
+
508
+ # 실제 데이터 추가
509
+ fig.add_trace(go.Scatter(
510
+ x=past_dates,
511
+ y=past_prices,
512
+ mode='markers',
513
+ name='실제주가'
514
+ ))
515
+
516
+ # 예측 데이터 추가
517
+ fig.add_trace(go.Scatter(
518
+ x=future_dates,
519
+ y=future_prices,
520
+ mode='lines+markers',
521
+ name='예측치(30일)'
522
+ ))
523
+
524
+ # 레이아웃 업데이트
525
+ fig.update_layout(
526
+ xaxis_title='일자',
527
+ yaxis_title='주가(원)',
528
+ xaxis=dict(
529
+ tickformat='%Y/%m',
530
+ ),
531
+ yaxis=dict(
532
+ tickformat=".0f",
533
+ ),
534
+ showlegend=True,
535
+ )
536
+
537
+ mylogger.debug(f"past_dates({len(past_dates)}) - {past_dates}")
538
+ mylogger.debug(f"past_prices({len(past_prices)} - {past_prices}")
539
+ mylogger.debug(f"future_dates({len(future_dates)}) - {future_dates}")
540
+ mylogger.debug(f"future_prices({len(future_prices)}) - {future_prices}")
541
+
542
+ fig.update_layout(
543
+ # title=f'{self.code} {self.name} 주가 예측 그래프(prophet)',
544
+ xaxis_title='일자',
545
+ yaxis_title='주가(원)',
546
+ xaxis=dict(
547
+ tickformat='%Y/%m', # X축을 '연/월' 형식으로 표시
548
+ ),
549
+ yaxis=dict(
550
+ tickformat=".0f", # 소수점 없이 원래 숫자 표시
551
+ ),
552
+ showlegend=False,
553
+ )
554
+
555
+ if to == 'str':
556
+ # 그래프 HTML로 변환 (string 형식으로 저장)
557
+ graph_html = plot(fig, output_type='div')
558
+ return graph_html
559
+ elif to == 'png':
560
+ # 그래프를 PNG 파일로 저장
561
+ fig.write_image(f"myLSTM_{self.code}.png")
562
+ return None
563
+ elif to == 'htmlfile':
564
+ # 그래프를 HTML로 저장
565
+ plot(fig, filename=f'myLSTM_{self.code}.html', auto_open=False)
566
+ return None
567
+ else:
568
+ Exception("to 인자가 맞지 않습니다.")
569
+
570
+ def visualization(self, refresh=True):
571
+ """
572
+ Visualizes actual and predicted stock prices, allowing for an evaluation of the
573
+ predictions made using a Long Short-Term Memory (LSTM) model. The visualization
574
+ includes plots for both future predicted prices and historical price comparisons
575
+ for improved insights.
576
+
577
+ Parameters
578
+ ----------
579
+ refresh : bool, optional
580
+ Indicates whether to refresh and retrieve the latest predictions before
581
+ visualizing. Defaults to True.
582
+
583
+ Raises
584
+ ------
585
+ None
586
+
587
+ Returns
588
+ -------
589
+ None
590
+ """
591
+ self.initializing()
592
+ future_data, _ = self.get_final_predictions(refresh=refresh)
593
+ mylogger.debug(f'future_data : {future_data}')
594
+ future_dates = pd.to_datetime(list(future_data.keys()))
595
+ mylogger.debug(f'future_dates : {future_dates}')
596
+ future_prices = pd.Series(future_data.values(), index=range(len(future_data.values()))).astype(float)
597
+ mylogger.debug(f'future_prices : {future_prices}')
598
+
599
+ # 시각화1
600
+ plt.figure(figsize=(10, 6))
601
+
602
+ # 실제 주가
603
+ plt.plot(self.raw_data.index, self.raw_data['Close'], label='Actual Price')
604
+
605
+ # 미래 주가 예측
606
+ plt.plot(future_dates, future_prices, label='Future Predicted Price', linestyle='--')
607
+
608
+ plt.xlabel('Date')
609
+ plt.ylabel('Stock Price')
610
+ plt.legend()
611
+ plt.title(f'{self.name} Stock Price Prediction with LSTM')
612
+ plt.show()
613
+
614
+ """# 시각화2
615
+ plt.figure(figsize=(10, 6))
616
+ plt.plot(self.raw_data.index[self.lstm_data.train_size + 60:], self.lstm_data.data_2d[self.lstm_data.train_size + 60:], label='Actual Price')
617
+ plt.plot(self.raw_data.index[self.lstm_data.train_size + 60:], lstm_grade.mean_test_predictions_2d, label='Predicted Price')
618
+ plt.xlabel('Date')
619
+ plt.ylabel('Price')
620
+ plt.legend()
621
+ plt.title('Stock Price Prediction with LSTM Ensemble')
622
+ plt.show()"""
623
+
624
+ def is_up(self) -> bool:
625
+ """
626
+ Determines if the predicted data indicates an increasing trend.
627
+
628
+ This method evaluates the results of future predictions and checks if all the
629
+ subsequent values in the prediction data increase compared to their predecessors.
630
+
631
+ Returns:
632
+ bool: True if all future predicted values increase in sequence, False otherwise.
633
+
634
+ Raises:
635
+ None: This method does not raise any exceptions.
636
+ """
637
+ # 튜플의 [0]은 날짜 [1]은 값 배열
638
+ future_data, _ = self.get_final_predictions(refresh=False)
639
+ # 데이터를 1D 배열로 변환
640
+ flattened_data = list(future_data.values())
641
+ mylogger.debug(f"flattened_data : {flattened_data}")
642
+ # 증가 여부 확인
643
+ return all(flattened_data[i] < flattened_data[i + 1] for i in range(len(flattened_data) - 1))
644
+
645
+ @staticmethod
646
+ def caching_based_on_prophet_ranking(refresh: bool, top=20):
647
+ """
648
+ This method utilizes a ranking system generated by the Score class for
649
+ predictive caching.
650
+
651
+ Parameters
652
+ ----------
653
+ refresh : bool
654
+ Whether to refresh the predictions for the selected items.
655
+ top : int, optional
656
+ The number of top-ranked items to process, by default 20.
657
+ """
658
+ ranking_topn = score.Score.ranking(refresh=False, top=top)
659
+ mylogger.info(ranking_topn)
660
+ mylstm = MyLSTM('005930')
661
+ print(f"*** LSTM prediction redis cashing top{top} items ***")
662
+ for i, (code, _) in enumerate(ranking_topn.items()):
663
+ mylstm.code = code
664
+ print(f"{i + 1}. {mylstm.code}/{mylstm.name}")
665
+ mylstm.initializing()
666
+ mylstm.get_final_predictions(refresh=refresh, num=5)
667
+
668
+
669
+
670
+