analyser_hj3415 2.10.6__py3-none-any.whl → 3.0.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- analyser_hj3415/__init__.py +13 -0
- analyser_hj3415/analyser/__init__.py +0 -0
- analyser_hj3415/analyser/eval/__init__.py +4 -0
- analyser_hj3415/analyser/eval/blue.py +187 -0
- analyser_hj3415/analyser/eval/common.py +267 -0
- analyser_hj3415/analyser/eval/growth.py +110 -0
- analyser_hj3415/analyser/eval/mil.py +274 -0
- analyser_hj3415/analyser/eval/red.py +295 -0
- analyser_hj3415/{score.py → analyser/score.py} +24 -23
- analyser_hj3415/analyser/tsa/__init__.py +2 -0
- analyser_hj3415/analyser/tsa/lstm.py +670 -0
- analyser_hj3415/analyser/tsa/prophet.py +207 -0
- analyser_hj3415/cli.py +11 -88
- {analyser_hj3415-2.10.6.dist-info → analyser_hj3415-3.0.0.dist-info}/METADATA +3 -3
- analyser_hj3415-3.0.0.dist-info/RECORD +22 -0
- analyser_hj3415/eval.py +0 -960
- analyser_hj3415/tsa.py +0 -708
- analyser_hj3415-2.10.6.dist-info/RECORD +0 -14
- {analyser_hj3415-2.10.6.dist-info → analyser_hj3415-3.0.0.dist-info}/WHEEL +0 -0
- {analyser_hj3415-2.10.6.dist-info → analyser_hj3415-3.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,670 @@
|
|
1
|
+
"""
|
2
|
+
Time Series Analysis
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
import numpy as np
|
6
|
+
import yfinance as yf
|
7
|
+
from datetime import datetime, timedelta
|
8
|
+
import pandas as pd
|
9
|
+
from typing import Optional, Tuple
|
10
|
+
import plotly.graph_objs as go
|
11
|
+
from plotly.offline import plot
|
12
|
+
import matplotlib.pyplot as plt # Matplotlib 수동 임포트
|
13
|
+
from sklearn.preprocessing import MinMaxScaler
|
14
|
+
from tensorflow.keras.models import Sequential
|
15
|
+
from tensorflow.keras.layers import LSTM, Dense, Dropout
|
16
|
+
from tensorflow.keras.callbacks import EarlyStopping
|
17
|
+
from tensorflow.keras import Input
|
18
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
19
|
+
from dataclasses import dataclass
|
20
|
+
|
21
|
+
from utils_hj3415 import tools, setup_logger
|
22
|
+
from db_hj3415 import myredis
|
23
|
+
from analyser_hj3415.analyser import score
|
24
|
+
|
25
|
+
|
26
|
+
mylogger = setup_logger(__name__,'WARNING')
|
27
|
+
expire_time = tools.to_int(os.getenv('DEFAULT_EXPIRE_TIME_H', 48)) * 3600
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class LSTMData:
|
32
|
+
code: str
|
33
|
+
|
34
|
+
data_2d: np.ndarray
|
35
|
+
train_size: int
|
36
|
+
train_data_2d: np.ndarray
|
37
|
+
test_data_2d: np.ndarray
|
38
|
+
|
39
|
+
X_train_3d: np.ndarray
|
40
|
+
X_test_3d: np.ndarray
|
41
|
+
y_train_1d: np.ndarray
|
42
|
+
y_test_1d: np.ndarray
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class LSTMGrade:
|
47
|
+
"""
|
48
|
+
딥러닝 모델의 학습 결과를 평가하기 위해 사용하는 데이터 클래스
|
49
|
+
"""
|
50
|
+
code: str
|
51
|
+
train_mse: float
|
52
|
+
train_mae: float
|
53
|
+
train_r2: float
|
54
|
+
test_mse: float
|
55
|
+
test_mae: float
|
56
|
+
test_r2: float
|
57
|
+
|
58
|
+
|
59
|
+
class MyLSTM:
|
60
|
+
"""
|
61
|
+
LSTM(Long Short-Term Memory)
|
62
|
+
"""
|
63
|
+
# 미래 몇일을 예측할 것인가?
|
64
|
+
future_days = 30
|
65
|
+
|
66
|
+
def __init__(self, code: str):
|
67
|
+
assert tools.is_6digit(code), f'Invalid value : {code}'
|
68
|
+
self._code = code
|
69
|
+
self.name = myredis.Corps(code, 'c101').get_name()
|
70
|
+
self.scaler = MinMaxScaler(feature_range=(0, 1))
|
71
|
+
self.raw_data = pd.DataFrame()
|
72
|
+
self.lstm_data = LSTMData(
|
73
|
+
code=self.code,
|
74
|
+
data_2d=np.array([]),
|
75
|
+
train_size=0,
|
76
|
+
train_data_2d=np.array([]),
|
77
|
+
test_data_2d=np.array([]),
|
78
|
+
X_train_3d=np.array([]),
|
79
|
+
X_test_3d=np.array([]),
|
80
|
+
y_train_1d=np.array([]),
|
81
|
+
y_test_1d=np.array([]),
|
82
|
+
)
|
83
|
+
|
84
|
+
@property
|
85
|
+
def code(self) -> str:
|
86
|
+
return self._code
|
87
|
+
|
88
|
+
@code.setter
|
89
|
+
def code(self, code: str):
|
90
|
+
assert tools.is_6digit(code), f'Invalid value : {code}'
|
91
|
+
mylogger.debug(f'change code : {self.code} -> {code}')
|
92
|
+
|
93
|
+
self._code = code
|
94
|
+
self.name = myredis.Corps(code, 'c101').get_name()
|
95
|
+
self.scaler = MinMaxScaler(feature_range=(0, 1))
|
96
|
+
self.raw_data = pd.DataFrame()
|
97
|
+
self.lstm_data = LSTMData(
|
98
|
+
code=self.code,
|
99
|
+
data_2d=np.array([]),
|
100
|
+
train_size=0,
|
101
|
+
train_data_2d=np.array([]),
|
102
|
+
test_data_2d=np.array([]),
|
103
|
+
X_train_3d=np.array([]),
|
104
|
+
X_test_3d=np.array([]),
|
105
|
+
y_train_1d=np.array([]),
|
106
|
+
y_test_1d=np.array([]),
|
107
|
+
)
|
108
|
+
|
109
|
+
def initializing(self):
|
110
|
+
"""
|
111
|
+
Fetches stock price data for the last four years from Yahoo Finance and prepares
|
112
|
+
it for use in an LSTM model by normalizing, splitting into training and testing datasets,
|
113
|
+
and reshaping the data.
|
114
|
+
"""
|
115
|
+
def get_raw_data() -> pd.DataFrame:
|
116
|
+
"""
|
117
|
+
야후에서 해당 종목의 4년간 주가 raw data를 받아온다.
|
118
|
+
:return:
|
119
|
+
"""
|
120
|
+
# 오늘 날짜 가져오기
|
121
|
+
today = datetime.today()
|
122
|
+
|
123
|
+
# 4년 전 날짜 계산 (4년 = 365일 * 4)
|
124
|
+
four_years_ago = today - timedelta(days=365 * 4)
|
125
|
+
mylogger.info(
|
126
|
+
f"Get raw data from yfinance - start: {four_years_ago.strftime('%Y-%m-%d')}, end: {today.strftime('%Y-%m-%d')}")
|
127
|
+
|
128
|
+
df = yf.download(
|
129
|
+
self.code + '.KS',
|
130
|
+
start=four_years_ago.strftime('%Y-%m-%d'),
|
131
|
+
end=today.strftime('%Y-%m-%d')
|
132
|
+
)
|
133
|
+
df.index = df.index.tz_localize(None)
|
134
|
+
mylogger.debug(df)
|
135
|
+
return df
|
136
|
+
|
137
|
+
def preprocessing_for_lstm() -> LSTMData:
|
138
|
+
"""
|
139
|
+
lstm이 사용할 수 있도록 데이터 준비(정규화 및 8:2 훈련데이터 검증데이터 분리 및 차원변환)
|
140
|
+
:return:
|
141
|
+
"""
|
142
|
+
mylogger.info("lstm이 사용할 수 있도록 데이터 준비(정규화 및 8:2 훈련데이터 검증데이터 분리 및 차원변환)")
|
143
|
+
# 필요한 열만 선택 (종가만 사용) - 2차웜 배열로 변환
|
144
|
+
data_2d = self.raw_data['Close'].values.reshape(-1, 1)
|
145
|
+
mylogger.debug(f"종가데이터 2차원베열값[:5] : {data_2d[:5]}")
|
146
|
+
|
147
|
+
# 데이터 정규화 (0과 1 사이로 스케일링)
|
148
|
+
scaled_data_2d = self.scaler.fit_transform(data_2d)
|
149
|
+
|
150
|
+
# 학습 데이터 생성
|
151
|
+
# 주가 데이터를 80%는 학습용, 20%는 테스트용으로 분리하는 코드
|
152
|
+
train_size = int(len(scaled_data_2d) * 0.8)
|
153
|
+
train_data_2d = scaled_data_2d[:train_size]
|
154
|
+
test_data_2d = scaled_data_2d[train_size:]
|
155
|
+
mylogger.info(f'총 {len(data_2d)}개 데이터, train size : {train_size}')
|
156
|
+
|
157
|
+
# 학습 데이터에 대한 입력(X)과 정답(y)를 생성
|
158
|
+
def create_dataset(data, time_step=60):
|
159
|
+
X, y = [], []
|
160
|
+
for i in range(len(data) - time_step):
|
161
|
+
X.append(data[i:i + time_step, 0])
|
162
|
+
y.append(data[i + time_step, 0])
|
163
|
+
return np.array(X), np.array(y)
|
164
|
+
|
165
|
+
X_train, y_train_1d = create_dataset(train_data_2d)
|
166
|
+
X_test, y_test_1d = create_dataset(test_data_2d)
|
167
|
+
mylogger.debug(f"훈련데이터 shape (입력, 정답) / {X_train.shape}")
|
168
|
+
mylogger.debug(f"테스트데이터 shape (입력, 정답) / {X_test.shape}")
|
169
|
+
|
170
|
+
try:
|
171
|
+
mylogger.debug("2차원 데이터를 3차원으로 변환합니다.")
|
172
|
+
# LSTM 모델 입력을 위해 데이터를 3차원으로 변환
|
173
|
+
X_train_3d = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
174
|
+
X_test_3d = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
|
175
|
+
except IndexError:
|
176
|
+
return LSTMData(
|
177
|
+
code=self.code,
|
178
|
+
data_2d=np.array([]),
|
179
|
+
train_size=0,
|
180
|
+
train_data_2d=np.array([]),
|
181
|
+
test_data_2d=np.array([]),
|
182
|
+
X_train_3d=np.array([]),
|
183
|
+
X_test_3d=np.array([]),
|
184
|
+
y_train_1d=np.array([]),
|
185
|
+
y_test_1d=np.array([]),
|
186
|
+
)
|
187
|
+
|
188
|
+
mylogger.debug(
|
189
|
+
f'차원 - X_train_3d : {X_train_3d.ndim}, X_test_3d : {X_test_3d.ndim}, y_train : {y_train_1d.ndim}, y_test : {y_test_1d.ndim}')
|
190
|
+
mylogger.debug(
|
191
|
+
f'len - X_train_3d : {len(X_train_3d)}, X_test_3d : {len(X_test_3d)}, y_train : {len(y_train_1d)}, y_test : {len(y_test_1d)}')
|
192
|
+
|
193
|
+
return LSTMData(
|
194
|
+
code=self.code,
|
195
|
+
data_2d=data_2d,
|
196
|
+
train_size=train_size,
|
197
|
+
train_data_2d=train_data_2d,
|
198
|
+
test_data_2d=test_data_2d,
|
199
|
+
X_train_3d=X_train_3d,
|
200
|
+
X_test_3d=X_test_3d,
|
201
|
+
y_train_1d=y_train_1d,
|
202
|
+
y_test_1d=y_test_1d,
|
203
|
+
)
|
204
|
+
self.raw_data = get_raw_data()
|
205
|
+
self.lstm_data = preprocessing_for_lstm()
|
206
|
+
|
207
|
+
def ensemble_training(self, num) -> Tuple[list, LSTMGrade]:
|
208
|
+
"""
|
209
|
+
Represents the implementation for training, predicting, and evaluating an LSTM-based deep learning model.
|
210
|
+
Defines functions for LSTM model training, prediction, grading results, and ensemble model training.
|
211
|
+
|
212
|
+
Methods defined:
|
213
|
+
- model_training: Constructs and trains the LSTM model with specified layers and configurations.
|
214
|
+
- prediction: Uses a trained model to perform predictions, restores normalization, and returns results.
|
215
|
+
- grading: Analyzes the training and testing predictions, computes evaluation metrics, and returns grading details.
|
216
|
+
- ensemble_training: Trains multiple LSTM models to create ensemble predictions and future forecasts.
|
217
|
+
|
218
|
+
ensemble_training:
|
219
|
+
Trains multiple LSTM models iteratively and forms ensemble predictions for training, testing datasets,
|
220
|
+
and future trend forecasting. Evaluates the models collectively using grading metrics.
|
221
|
+
|
222
|
+
Arguments:
|
223
|
+
num: int
|
224
|
+
The number of ensemble LSTM models to train.
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
Tuple[list, LSTMGrade]
|
228
|
+
A list of predicted scaled values for future forecasts and the performance grading object.
|
229
|
+
|
230
|
+
Raises:
|
231
|
+
Does not explicitly raise errors but logs warnings for possible issues during training or prediction.
|
232
|
+
"""
|
233
|
+
def model_training() -> Sequential:
|
234
|
+
# LSTM 모델 생성 - 유닛과 드롭아웃의 수는 테스트로 최적화 됨.
|
235
|
+
model = Sequential()
|
236
|
+
mylogger.debug(f"훈련 데이터 shape - {self.lstm_data.X_train_3d.shape}")
|
237
|
+
try:
|
238
|
+
# Input(shape=(50, 1))는 50개의 타임스텝을 가지는 입력 데이터를 처리하며, 각 타임스텝에 1개의 특성이 있다는 것을 의미
|
239
|
+
model.add(Input(shape=(self.lstm_data.X_train_3d.shape[1], 1))) # 입력 레이어에 명시적으로 Input을 사용
|
240
|
+
except IndexError:
|
241
|
+
mylogger.error("모델 트레이닝을 위한 자료가 부족합니다.")
|
242
|
+
return model
|
243
|
+
|
244
|
+
model.add(LSTM(units=150, return_sequences=True))
|
245
|
+
model.add(Dropout(0.2))
|
246
|
+
model.add(LSTM(units=75, return_sequences=False))
|
247
|
+
model.add(Dropout(0.2))
|
248
|
+
model.add(Dense(units=25))
|
249
|
+
model.add(Dropout(0.3))
|
250
|
+
model.add(Dense(units=1))
|
251
|
+
|
252
|
+
# 모델 요약 출력
|
253
|
+
model.summary()
|
254
|
+
|
255
|
+
# 모델 컴파일 및 학습
|
256
|
+
model.compile(optimizer='adam', loss='mean_squared_error')
|
257
|
+
|
258
|
+
# 조기 종료 설정
|
259
|
+
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
|
260
|
+
|
261
|
+
# 모델 학습 - 과적합 방지위한 조기종료 세팅
|
262
|
+
model.fit(self.lstm_data.X_train_3d, self.lstm_data.y_train_1d,
|
263
|
+
epochs=75, batch_size=32, validation_data=(self.lstm_data.X_test_3d, self.lstm_data.y_test_1d),
|
264
|
+
callbacks=[early_stopping])
|
265
|
+
return model
|
266
|
+
|
267
|
+
def prediction(model_in: Sequential, data: np.ndarray) -> np.ndarray:
|
268
|
+
"""
|
269
|
+
훈련될 모델을 통해 예측을 시행하여 정규화를 복원하고 결과 반환한다.
|
270
|
+
:param model_in:
|
271
|
+
:param data:
|
272
|
+
:return:
|
273
|
+
"""
|
274
|
+
predictions_2d = model_in.predict(data)
|
275
|
+
predictions_scaled_2d = self.scaler.inverse_transform(predictions_2d) # 스케일링 복원
|
276
|
+
mylogger.info(
|
277
|
+
f'predictions_scaled_2d : ndim - {predictions_scaled_2d.ndim} len - {len(predictions_scaled_2d)}') # numpy.ndarray 타입
|
278
|
+
mylogger.debug(f'predictions_scaled_2d[:5] :{predictions_scaled_2d[:5]}')
|
279
|
+
return predictions_scaled_2d
|
280
|
+
|
281
|
+
def grading(train_predictions: list, test_predictions: list) -> LSTMGrade:
|
282
|
+
"""
|
283
|
+
딥러닝 결과를 분석하기 위한 함수
|
284
|
+
:param train_predictions:
|
285
|
+
:param test_predictions:
|
286
|
+
:return:
|
287
|
+
"""
|
288
|
+
if len(train_predictions) == 0 or len(test_predictions) == 0:
|
289
|
+
mylogger.warning("딥러닝 결과가 없어서 LSTMGrade 데이터를 비워서 반환합니다.")
|
290
|
+
return LSTMGrade(
|
291
|
+
code= self.code,
|
292
|
+
train_mse=float('nan'),
|
293
|
+
train_mae=float('nan'),
|
294
|
+
train_r2=float('nan'),
|
295
|
+
test_mse=float('nan'),
|
296
|
+
test_mae=float('nan'),
|
297
|
+
test_r2=float('nan'),
|
298
|
+
)
|
299
|
+
|
300
|
+
# 예측값을 평균내서 최종 예측값 도출
|
301
|
+
mean_train_prediction_2d = np.mean(train_predictions, axis=0)
|
302
|
+
mean_test_predictions_2d = np.mean(test_predictions, axis=0)
|
303
|
+
|
304
|
+
# y값(정답) 정규화 해제
|
305
|
+
y_train_scaled_2d = self.scaler.inverse_transform(self.lstm_data.y_train_1d.reshape(-1, 1))
|
306
|
+
y_test_scaled_2d = self.scaler.inverse_transform(self.lstm_data.y_test_1d.reshape(-1, 1))
|
307
|
+
|
308
|
+
# 평가 지표 계산
|
309
|
+
train_mse = mean_squared_error(y_train_scaled_2d, mean_train_prediction_2d)
|
310
|
+
train_mae = mean_absolute_error(y_train_scaled_2d, mean_train_prediction_2d)
|
311
|
+
train_r2 = r2_score(y_train_scaled_2d, mean_train_prediction_2d)
|
312
|
+
|
313
|
+
test_mse = mean_squared_error(y_test_scaled_2d, mean_test_predictions_2d)
|
314
|
+
test_mae = mean_absolute_error(y_test_scaled_2d, mean_test_predictions_2d)
|
315
|
+
test_r2 = r2_score(y_test_scaled_2d, mean_test_predictions_2d)
|
316
|
+
|
317
|
+
# 평가 결과 출력
|
318
|
+
print("Training Data:")
|
319
|
+
print(f"Train MSE: {train_mse}, Train MAE: {train_mae}, Train R²: {train_r2}")
|
320
|
+
print("\nTesting Data:")
|
321
|
+
print(f"Test MSE: {test_mse}, Test MAE: {test_mae}, Test R²: {test_r2}")
|
322
|
+
# mse, mae는 작을수록 좋으며 R^2은 0-1 사이값 1에 가까울수록 정확함
|
323
|
+
# 과적합에 대한 평가는 train 과 test를 비교하여 test가 너무 않좋으면 과적합 의심.
|
324
|
+
|
325
|
+
return LSTMGrade(
|
326
|
+
code=self.code,
|
327
|
+
train_mse=train_mse,
|
328
|
+
train_mae=train_mae,
|
329
|
+
train_r2=train_r2,
|
330
|
+
test_mse=test_mse,
|
331
|
+
test_mae=test_mae,
|
332
|
+
test_r2=test_r2,
|
333
|
+
)
|
334
|
+
|
335
|
+
ensemble_train_predictions_2d = []
|
336
|
+
ensemble_test_predictions_2d = []
|
337
|
+
ensemble_future_predictions_2d = []
|
338
|
+
|
339
|
+
for i in range(num):
|
340
|
+
print(f"Training model {i + 1}/{num}...")
|
341
|
+
model = model_training()
|
342
|
+
|
343
|
+
if len(model.layers) == 0:
|
344
|
+
mylogger.warning("이 모델은 빈 Sequential() 입니다.")
|
345
|
+
return [], grading([],[])
|
346
|
+
else:
|
347
|
+
mylogger.info("레이어가 있는 모델입니다.")
|
348
|
+
|
349
|
+
# 훈련 데이터 예측
|
350
|
+
train_predictions_scaled_2d = prediction(model, self.lstm_data.X_train_3d)
|
351
|
+
ensemble_train_predictions_2d.append(train_predictions_scaled_2d)
|
352
|
+
|
353
|
+
# 테스트 데이터 예측
|
354
|
+
test_predictions_scaled_2d = prediction(model, self.lstm_data.X_test_3d)
|
355
|
+
ensemble_test_predictions_2d.append(test_predictions_scaled_2d)
|
356
|
+
|
357
|
+
# 8. 미래 30일 예측
|
358
|
+
# 마지막 60일간의 데이터를 기반으로 미래 30일을 예측
|
359
|
+
|
360
|
+
last_60_days_2d = self.lstm_data.test_data_2d[-60:]
|
361
|
+
last_60_days_3d = last_60_days_2d.reshape(1, -1, 1)
|
362
|
+
|
363
|
+
future_predictions = []
|
364
|
+
for _ in range(self.future_days):
|
365
|
+
predicted_price_2d = model.predict(last_60_days_3d)
|
366
|
+
future_predictions.append(predicted_price_2d[0][0])
|
367
|
+
|
368
|
+
# 예측값을 다시 입력으로 사용하여 새로운 예측을 만듦
|
369
|
+
predicted_price_reshaped = np.reshape(predicted_price_2d, (1, 1, 1)) # 3D 배열로 변환
|
370
|
+
last_60_days_3d = np.append(last_60_days_3d[:, 1:, :], predicted_price_reshaped, axis=1)
|
371
|
+
|
372
|
+
# 예측된 주가를 다시 스케일링 복원
|
373
|
+
future_predictions_2d = np.array(future_predictions).reshape(-1, 1)
|
374
|
+
future_predictions_scaled_2d = self.scaler.inverse_transform(future_predictions_2d)
|
375
|
+
ensemble_future_predictions_2d.append(future_predictions_scaled_2d)
|
376
|
+
|
377
|
+
lstm_grade = grading(ensemble_train_predictions_2d, ensemble_test_predictions_2d)
|
378
|
+
|
379
|
+
return ensemble_future_predictions_2d, lstm_grade
|
380
|
+
|
381
|
+
def get_final_predictions(self, refresh: bool, num=5) -> Tuple[dict, LSTMGrade]:
|
382
|
+
"""
|
383
|
+
Fetches final predictions based on an ensemble method using deep learning models. This process averages the
|
384
|
+
predictions to forecast future data, primarily for debugging purposes as it is challenging to utilize
|
385
|
+
Redis cache for evaluation.
|
386
|
+
|
387
|
+
Parameters:
|
388
|
+
refresh (bool): Flag to indicate whether to refresh cached data.
|
389
|
+
num (int, optional): Number of iterations for predictions. Defaults to 5.
|
390
|
+
|
391
|
+
Returns:
|
392
|
+
Tuple[dict, LSTMGrade]: A tuple containing a dictionary of future predictions and their corresponding
|
393
|
+
evaluation grade.
|
394
|
+
|
395
|
+
Raises:
|
396
|
+
AssertionError: If the lengths of future dates and predicted values do not match.
|
397
|
+
|
398
|
+
Notes:
|
399
|
+
- This function integrates ensemble training and caching of predictive data.
|
400
|
+
- The future prediction keys correspond to the dates in "YYYY-MM-DD" format.
|
401
|
+
- Makes use of Redis for data retrieval and caching mechanisms.
|
402
|
+
"""
|
403
|
+
print("**** Start get_final_predictions... ****")
|
404
|
+
redis_name = f'{self.code}_mylstm_predictions'
|
405
|
+
|
406
|
+
print(
|
407
|
+
f"redisname: '{redis_name}' / refresh : {refresh} / expire_time : {expire_time/3600}h")
|
408
|
+
|
409
|
+
def fetch_final_predictions(num_in) -> tuple:
|
410
|
+
"""
|
411
|
+
앙상블법으로 딥러닝을 모델을 반복해서 평균을 내서 미래를 예측한다. 평가는 래시스 캐시로 반환하기 어려워 일단 디버그 용도로만 사용하기로
|
412
|
+
:param num_in:
|
413
|
+
:return:
|
414
|
+
"""
|
415
|
+
def make_future_data_dict(future_predictions) -> dict:
|
416
|
+
# 시각화를 위한 준비 - 날짜 생성 (미래 예측 날짜), 미래예측값 평균
|
417
|
+
mylogger.debug(self.raw_data)
|
418
|
+
last_date = self.raw_data.index[-1]
|
419
|
+
mylogger.debug(f'last_date : {last_date}')
|
420
|
+
future_dates = pd.date_range(last_date, periods=self.future_days + 1).tolist()[1:]
|
421
|
+
mylogger.debug(f'future_dates : {future_dates}')
|
422
|
+
final_future_predictions = np.mean(future_predictions, axis=0).tolist()
|
423
|
+
mylogger.debug(f'final_future_predictions(예측주가 리스트) : {final_future_predictions}')
|
424
|
+
|
425
|
+
assert len(future_dates) == len(
|
426
|
+
final_future_predictions), "future_dates 와 final_future_predictions 개수가 일치하지 않습니다."
|
427
|
+
|
428
|
+
data = {}
|
429
|
+
for i in range(len(future_dates)):
|
430
|
+
data[future_dates[i].strftime("%Y-%m-%d")] = final_future_predictions[i][0]
|
431
|
+
return data
|
432
|
+
|
433
|
+
# 앙상블 트레이닝 시행
|
434
|
+
future_predictions_2d, lstm_grade = self.ensemble_training(num=num_in)
|
435
|
+
mylogger.debug(f'future_predictions_2d[:5] : {future_predictions_2d[:5]}')
|
436
|
+
mylogger.debug(f'lstm grade(학습결과평과) : {lstm_grade}')
|
437
|
+
if len(future_predictions_2d) == 0:
|
438
|
+
return {}, lstm_grade
|
439
|
+
|
440
|
+
# {날짜(유닉스타임): 예측주가} 형식으로 딕서너리로 제작
|
441
|
+
future_data = make_future_data_dict(future_predictions_2d)
|
442
|
+
mylogger.debug(f'future_data : {future_data}')
|
443
|
+
|
444
|
+
return future_data, lstm_grade
|
445
|
+
|
446
|
+
return myredis.Base.fetch_and_cache_data(redis_name, refresh, fetch_final_predictions, num, timer=expire_time)
|
447
|
+
|
448
|
+
def export(self, refresh=False, to="str", num=5) -> Optional[str]:
|
449
|
+
"""
|
450
|
+
Prepares and exports stock price trend graphs based on past and predicted data.
|
451
|
+
The graphs include markers for actual prices along with a line-plot for forecasted prices.
|
452
|
+
The function offers multiple export formats: an HTML string, a PNG file, or an HTML file.
|
453
|
+
|
454
|
+
Parameters
|
455
|
+
----------
|
456
|
+
refresh : bool, optional
|
457
|
+
Specifies whether to refresh dataset before preparing future data. Defaults to False.
|
458
|
+
to : str, optional
|
459
|
+
Determines the export format of the graph. Valid choices are
|
460
|
+
'str', 'png', or 'htmlfile'. Defaults to 'str'.
|
461
|
+
num : int, optional
|
462
|
+
Specifies the number of future days to include in prediction. Defaults to 5.
|
463
|
+
|
464
|
+
Returns
|
465
|
+
-------
|
466
|
+
Optional[str]
|
467
|
+
A string representation of the graph in HTML format if `to` is set to 'str'.
|
468
|
+
Returns None if `to` is set to either 'png' or 'htmlfile'.
|
469
|
+
|
470
|
+
Raises
|
471
|
+
------
|
472
|
+
Exception
|
473
|
+
Raised if the `to` parameter does not match the allowed values: 'str', 'png', or 'htmlfile'.
|
474
|
+
"""
|
475
|
+
def prepare_past_data(past_days) -> tuple:
|
476
|
+
# 데이터 준비
|
477
|
+
raw_data_copied = self.raw_data.reset_index()
|
478
|
+
data = raw_data_copied[['Date', 'Close']][-past_days:].reset_index(drop=True)
|
479
|
+
|
480
|
+
# 'Date'와 'Close' 열 추출
|
481
|
+
past_dates = pd.to_datetime(data['Date'])
|
482
|
+
past_prices = data['Close']
|
483
|
+
|
484
|
+
# 'past_prices'가 Series인지 확인
|
485
|
+
if isinstance(past_prices, pd.DataFrame):
|
486
|
+
past_prices = past_prices.squeeze()
|
487
|
+
|
488
|
+
# 'Close' 열의 데이터 타입 변경
|
489
|
+
past_prices = past_prices.astype(float)
|
490
|
+
return past_dates, past_prices
|
491
|
+
|
492
|
+
def prepare_future_data(refresh_in, num_in) -> tuple:
|
493
|
+
future_data, lstm_grade = self.get_final_predictions(refresh=refresh_in, num=num_in)
|
494
|
+
|
495
|
+
# 예측 데이터 준비
|
496
|
+
future_dates = pd.to_datetime(list(future_data.keys()))
|
497
|
+
|
498
|
+
future_prices = pd.Series(future_data.values(), index=range(len(future_data.values()))).astype(float)
|
499
|
+
return future_dates, future_prices
|
500
|
+
|
501
|
+
self.initializing()
|
502
|
+
past_dates, past_prices = prepare_past_data(past_days=120)
|
503
|
+
future_dates, future_prices = prepare_future_data(refresh_in=refresh, num_in=num)
|
504
|
+
|
505
|
+
# 그래프 생성
|
506
|
+
fig = go.Figure()
|
507
|
+
|
508
|
+
# 실제 데이터 추가
|
509
|
+
fig.add_trace(go.Scatter(
|
510
|
+
x=past_dates,
|
511
|
+
y=past_prices,
|
512
|
+
mode='markers',
|
513
|
+
name='실제주가'
|
514
|
+
))
|
515
|
+
|
516
|
+
# 예측 데이터 추가
|
517
|
+
fig.add_trace(go.Scatter(
|
518
|
+
x=future_dates,
|
519
|
+
y=future_prices,
|
520
|
+
mode='lines+markers',
|
521
|
+
name='예측치(30일)'
|
522
|
+
))
|
523
|
+
|
524
|
+
# 레이아웃 업데이트
|
525
|
+
fig.update_layout(
|
526
|
+
xaxis_title='일자',
|
527
|
+
yaxis_title='주가(원)',
|
528
|
+
xaxis=dict(
|
529
|
+
tickformat='%Y/%m',
|
530
|
+
),
|
531
|
+
yaxis=dict(
|
532
|
+
tickformat=".0f",
|
533
|
+
),
|
534
|
+
showlegend=True,
|
535
|
+
)
|
536
|
+
|
537
|
+
mylogger.debug(f"past_dates({len(past_dates)}) - {past_dates}")
|
538
|
+
mylogger.debug(f"past_prices({len(past_prices)} - {past_prices}")
|
539
|
+
mylogger.debug(f"future_dates({len(future_dates)}) - {future_dates}")
|
540
|
+
mylogger.debug(f"future_prices({len(future_prices)}) - {future_prices}")
|
541
|
+
|
542
|
+
fig.update_layout(
|
543
|
+
# title=f'{self.code} {self.name} 주가 예측 그래프(prophet)',
|
544
|
+
xaxis_title='일자',
|
545
|
+
yaxis_title='주가(원)',
|
546
|
+
xaxis=dict(
|
547
|
+
tickformat='%Y/%m', # X축을 '연/월' 형식으로 표시
|
548
|
+
),
|
549
|
+
yaxis=dict(
|
550
|
+
tickformat=".0f", # 소수점 없이 원래 숫자 표시
|
551
|
+
),
|
552
|
+
showlegend=False,
|
553
|
+
)
|
554
|
+
|
555
|
+
if to == 'str':
|
556
|
+
# 그래프 HTML로 변환 (string 형식으로 저장)
|
557
|
+
graph_html = plot(fig, output_type='div')
|
558
|
+
return graph_html
|
559
|
+
elif to == 'png':
|
560
|
+
# 그래프를 PNG 파일로 저장
|
561
|
+
fig.write_image(f"myLSTM_{self.code}.png")
|
562
|
+
return None
|
563
|
+
elif to == 'htmlfile':
|
564
|
+
# 그래프를 HTML로 저장
|
565
|
+
plot(fig, filename=f'myLSTM_{self.code}.html', auto_open=False)
|
566
|
+
return None
|
567
|
+
else:
|
568
|
+
Exception("to 인자가 맞지 않습니다.")
|
569
|
+
|
570
|
+
def visualization(self, refresh=True):
|
571
|
+
"""
|
572
|
+
Visualizes actual and predicted stock prices, allowing for an evaluation of the
|
573
|
+
predictions made using a Long Short-Term Memory (LSTM) model. The visualization
|
574
|
+
includes plots for both future predicted prices and historical price comparisons
|
575
|
+
for improved insights.
|
576
|
+
|
577
|
+
Parameters
|
578
|
+
----------
|
579
|
+
refresh : bool, optional
|
580
|
+
Indicates whether to refresh and retrieve the latest predictions before
|
581
|
+
visualizing. Defaults to True.
|
582
|
+
|
583
|
+
Raises
|
584
|
+
------
|
585
|
+
None
|
586
|
+
|
587
|
+
Returns
|
588
|
+
-------
|
589
|
+
None
|
590
|
+
"""
|
591
|
+
self.initializing()
|
592
|
+
future_data, _ = self.get_final_predictions(refresh=refresh)
|
593
|
+
mylogger.debug(f'future_data : {future_data}')
|
594
|
+
future_dates = pd.to_datetime(list(future_data.keys()))
|
595
|
+
mylogger.debug(f'future_dates : {future_dates}')
|
596
|
+
future_prices = pd.Series(future_data.values(), index=range(len(future_data.values()))).astype(float)
|
597
|
+
mylogger.debug(f'future_prices : {future_prices}')
|
598
|
+
|
599
|
+
# 시각화1
|
600
|
+
plt.figure(figsize=(10, 6))
|
601
|
+
|
602
|
+
# 실제 주가
|
603
|
+
plt.plot(self.raw_data.index, self.raw_data['Close'], label='Actual Price')
|
604
|
+
|
605
|
+
# 미래 주가 예측
|
606
|
+
plt.plot(future_dates, future_prices, label='Future Predicted Price', linestyle='--')
|
607
|
+
|
608
|
+
plt.xlabel('Date')
|
609
|
+
plt.ylabel('Stock Price')
|
610
|
+
plt.legend()
|
611
|
+
plt.title(f'{self.name} Stock Price Prediction with LSTM')
|
612
|
+
plt.show()
|
613
|
+
|
614
|
+
"""# 시각화2
|
615
|
+
plt.figure(figsize=(10, 6))
|
616
|
+
plt.plot(self.raw_data.index[self.lstm_data.train_size + 60:], self.lstm_data.data_2d[self.lstm_data.train_size + 60:], label='Actual Price')
|
617
|
+
plt.plot(self.raw_data.index[self.lstm_data.train_size + 60:], lstm_grade.mean_test_predictions_2d, label='Predicted Price')
|
618
|
+
plt.xlabel('Date')
|
619
|
+
plt.ylabel('Price')
|
620
|
+
plt.legend()
|
621
|
+
plt.title('Stock Price Prediction with LSTM Ensemble')
|
622
|
+
plt.show()"""
|
623
|
+
|
624
|
+
def is_up(self) -> bool:
|
625
|
+
"""
|
626
|
+
Determines if the predicted data indicates an increasing trend.
|
627
|
+
|
628
|
+
This method evaluates the results of future predictions and checks if all the
|
629
|
+
subsequent values in the prediction data increase compared to their predecessors.
|
630
|
+
|
631
|
+
Returns:
|
632
|
+
bool: True if all future predicted values increase in sequence, False otherwise.
|
633
|
+
|
634
|
+
Raises:
|
635
|
+
None: This method does not raise any exceptions.
|
636
|
+
"""
|
637
|
+
# 튜플의 [0]은 날짜 [1]은 값 배열
|
638
|
+
future_data, _ = self.get_final_predictions(refresh=False)
|
639
|
+
# 데이터를 1D 배열로 변환
|
640
|
+
flattened_data = list(future_data.values())
|
641
|
+
mylogger.debug(f"flattened_data : {flattened_data}")
|
642
|
+
# 증가 여부 확인
|
643
|
+
return all(flattened_data[i] < flattened_data[i + 1] for i in range(len(flattened_data) - 1))
|
644
|
+
|
645
|
+
@staticmethod
|
646
|
+
def caching_based_on_prophet_ranking(refresh: bool, top=20):
|
647
|
+
"""
|
648
|
+
This method utilizes a ranking system generated by the Score class for
|
649
|
+
predictive caching.
|
650
|
+
|
651
|
+
Parameters
|
652
|
+
----------
|
653
|
+
refresh : bool
|
654
|
+
Whether to refresh the predictions for the selected items.
|
655
|
+
top : int, optional
|
656
|
+
The number of top-ranked items to process, by default 20.
|
657
|
+
"""
|
658
|
+
ranking_topn = score.Score.ranking(refresh=False, top=top)
|
659
|
+
mylogger.info(ranking_topn)
|
660
|
+
mylstm = MyLSTM('005930')
|
661
|
+
print(f"*** LSTM prediction redis cashing top{top} items ***")
|
662
|
+
for i, (code, _) in enumerate(ranking_topn.items()):
|
663
|
+
mylstm.code = code
|
664
|
+
print(f"{i + 1}. {mylstm.code}/{mylstm.name}")
|
665
|
+
mylstm.initializing()
|
666
|
+
mylstm.get_final_predictions(refresh=refresh, num=5)
|
667
|
+
|
668
|
+
|
669
|
+
|
670
|
+
|