analyser_hj3415 2.10.6__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analyser_hj3415/__init__.py +13 -0
- analyser_hj3415/analyser/__init__.py +0 -0
- analyser_hj3415/analyser/eval/__init__.py +4 -0
- analyser_hj3415/analyser/eval/blue.py +187 -0
- analyser_hj3415/analyser/eval/common.py +267 -0
- analyser_hj3415/analyser/eval/growth.py +110 -0
- analyser_hj3415/analyser/eval/mil.py +274 -0
- analyser_hj3415/analyser/eval/red.py +295 -0
- analyser_hj3415/{score.py → analyser/score.py} +24 -23
- analyser_hj3415/analyser/tsa/__init__.py +2 -0
- analyser_hj3415/analyser/tsa/lstm.py +670 -0
- analyser_hj3415/analyser/tsa/prophet.py +207 -0
- analyser_hj3415/cli.py +11 -88
- {analyser_hj3415-2.10.6.dist-info → analyser_hj3415-3.0.0.dist-info}/METADATA +3 -3
- analyser_hj3415-3.0.0.dist-info/RECORD +22 -0
- analyser_hj3415/eval.py +0 -960
- analyser_hj3415/tsa.py +0 -708
- analyser_hj3415-2.10.6.dist-info/RECORD +0 -14
- {analyser_hj3415-2.10.6.dist-info → analyser_hj3415-3.0.0.dist-info}/WHEEL +0 -0
- {analyser_hj3415-2.10.6.dist-info → analyser_hj3415-3.0.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,670 @@
|
|
1
|
+
"""
|
2
|
+
Time Series Analysis
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
import numpy as np
|
6
|
+
import yfinance as yf
|
7
|
+
from datetime import datetime, timedelta
|
8
|
+
import pandas as pd
|
9
|
+
from typing import Optional, Tuple
|
10
|
+
import plotly.graph_objs as go
|
11
|
+
from plotly.offline import plot
|
12
|
+
import matplotlib.pyplot as plt # Matplotlib 수동 임포트
|
13
|
+
from sklearn.preprocessing import MinMaxScaler
|
14
|
+
from tensorflow.keras.models import Sequential
|
15
|
+
from tensorflow.keras.layers import LSTM, Dense, Dropout
|
16
|
+
from tensorflow.keras.callbacks import EarlyStopping
|
17
|
+
from tensorflow.keras import Input
|
18
|
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
19
|
+
from dataclasses import dataclass
|
20
|
+
|
21
|
+
from utils_hj3415 import tools, setup_logger
|
22
|
+
from db_hj3415 import myredis
|
23
|
+
from analyser_hj3415.analyser import score
|
24
|
+
|
25
|
+
|
26
|
+
mylogger = setup_logger(__name__,'WARNING')
|
27
|
+
expire_time = tools.to_int(os.getenv('DEFAULT_EXPIRE_TIME_H', 48)) * 3600
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class LSTMData:
|
32
|
+
code: str
|
33
|
+
|
34
|
+
data_2d: np.ndarray
|
35
|
+
train_size: int
|
36
|
+
train_data_2d: np.ndarray
|
37
|
+
test_data_2d: np.ndarray
|
38
|
+
|
39
|
+
X_train_3d: np.ndarray
|
40
|
+
X_test_3d: np.ndarray
|
41
|
+
y_train_1d: np.ndarray
|
42
|
+
y_test_1d: np.ndarray
|
43
|
+
|
44
|
+
|
45
|
+
@dataclass
|
46
|
+
class LSTMGrade:
|
47
|
+
"""
|
48
|
+
딥러닝 모델의 학습 결과를 평가하기 위해 사용하는 데이터 클래스
|
49
|
+
"""
|
50
|
+
code: str
|
51
|
+
train_mse: float
|
52
|
+
train_mae: float
|
53
|
+
train_r2: float
|
54
|
+
test_mse: float
|
55
|
+
test_mae: float
|
56
|
+
test_r2: float
|
57
|
+
|
58
|
+
|
59
|
+
class MyLSTM:
|
60
|
+
"""
|
61
|
+
LSTM(Long Short-Term Memory)
|
62
|
+
"""
|
63
|
+
# 미래 몇일을 예측할 것인가?
|
64
|
+
future_days = 30
|
65
|
+
|
66
|
+
def __init__(self, code: str):
|
67
|
+
assert tools.is_6digit(code), f'Invalid value : {code}'
|
68
|
+
self._code = code
|
69
|
+
self.name = myredis.Corps(code, 'c101').get_name()
|
70
|
+
self.scaler = MinMaxScaler(feature_range=(0, 1))
|
71
|
+
self.raw_data = pd.DataFrame()
|
72
|
+
self.lstm_data = LSTMData(
|
73
|
+
code=self.code,
|
74
|
+
data_2d=np.array([]),
|
75
|
+
train_size=0,
|
76
|
+
train_data_2d=np.array([]),
|
77
|
+
test_data_2d=np.array([]),
|
78
|
+
X_train_3d=np.array([]),
|
79
|
+
X_test_3d=np.array([]),
|
80
|
+
y_train_1d=np.array([]),
|
81
|
+
y_test_1d=np.array([]),
|
82
|
+
)
|
83
|
+
|
84
|
+
@property
|
85
|
+
def code(self) -> str:
|
86
|
+
return self._code
|
87
|
+
|
88
|
+
@code.setter
|
89
|
+
def code(self, code: str):
|
90
|
+
assert tools.is_6digit(code), f'Invalid value : {code}'
|
91
|
+
mylogger.debug(f'change code : {self.code} -> {code}')
|
92
|
+
|
93
|
+
self._code = code
|
94
|
+
self.name = myredis.Corps(code, 'c101').get_name()
|
95
|
+
self.scaler = MinMaxScaler(feature_range=(0, 1))
|
96
|
+
self.raw_data = pd.DataFrame()
|
97
|
+
self.lstm_data = LSTMData(
|
98
|
+
code=self.code,
|
99
|
+
data_2d=np.array([]),
|
100
|
+
train_size=0,
|
101
|
+
train_data_2d=np.array([]),
|
102
|
+
test_data_2d=np.array([]),
|
103
|
+
X_train_3d=np.array([]),
|
104
|
+
X_test_3d=np.array([]),
|
105
|
+
y_train_1d=np.array([]),
|
106
|
+
y_test_1d=np.array([]),
|
107
|
+
)
|
108
|
+
|
109
|
+
def initializing(self):
|
110
|
+
"""
|
111
|
+
Fetches stock price data for the last four years from Yahoo Finance and prepares
|
112
|
+
it for use in an LSTM model by normalizing, splitting into training and testing datasets,
|
113
|
+
and reshaping the data.
|
114
|
+
"""
|
115
|
+
def get_raw_data() -> pd.DataFrame:
|
116
|
+
"""
|
117
|
+
야후에서 해당 종목의 4년간 주가 raw data를 받아온다.
|
118
|
+
:return:
|
119
|
+
"""
|
120
|
+
# 오늘 날짜 가져오기
|
121
|
+
today = datetime.today()
|
122
|
+
|
123
|
+
# 4년 전 날짜 계산 (4년 = 365일 * 4)
|
124
|
+
four_years_ago = today - timedelta(days=365 * 4)
|
125
|
+
mylogger.info(
|
126
|
+
f"Get raw data from yfinance - start: {four_years_ago.strftime('%Y-%m-%d')}, end: {today.strftime('%Y-%m-%d')}")
|
127
|
+
|
128
|
+
df = yf.download(
|
129
|
+
self.code + '.KS',
|
130
|
+
start=four_years_ago.strftime('%Y-%m-%d'),
|
131
|
+
end=today.strftime('%Y-%m-%d')
|
132
|
+
)
|
133
|
+
df.index = df.index.tz_localize(None)
|
134
|
+
mylogger.debug(df)
|
135
|
+
return df
|
136
|
+
|
137
|
+
def preprocessing_for_lstm() -> LSTMData:
|
138
|
+
"""
|
139
|
+
lstm이 사용할 수 있도록 데이터 준비(정규화 및 8:2 훈련데이터 검증데이터 분리 및 차원변환)
|
140
|
+
:return:
|
141
|
+
"""
|
142
|
+
mylogger.info("lstm이 사용할 수 있도록 데이터 준비(정규화 및 8:2 훈련데이터 검증데이터 분리 및 차원변환)")
|
143
|
+
# 필요한 열만 선택 (종가만 사용) - 2차웜 배열로 변환
|
144
|
+
data_2d = self.raw_data['Close'].values.reshape(-1, 1)
|
145
|
+
mylogger.debug(f"종가데이터 2차원베열값[:5] : {data_2d[:5]}")
|
146
|
+
|
147
|
+
# 데이터 정규화 (0과 1 사이로 스케일링)
|
148
|
+
scaled_data_2d = self.scaler.fit_transform(data_2d)
|
149
|
+
|
150
|
+
# 학습 데이터 생성
|
151
|
+
# 주가 데이터를 80%는 학습용, 20%는 테스트용으로 분리하는 코드
|
152
|
+
train_size = int(len(scaled_data_2d) * 0.8)
|
153
|
+
train_data_2d = scaled_data_2d[:train_size]
|
154
|
+
test_data_2d = scaled_data_2d[train_size:]
|
155
|
+
mylogger.info(f'총 {len(data_2d)}개 데이터, train size : {train_size}')
|
156
|
+
|
157
|
+
# 학습 데이터에 대한 입력(X)과 정답(y)를 생성
|
158
|
+
def create_dataset(data, time_step=60):
|
159
|
+
X, y = [], []
|
160
|
+
for i in range(len(data) - time_step):
|
161
|
+
X.append(data[i:i + time_step, 0])
|
162
|
+
y.append(data[i + time_step, 0])
|
163
|
+
return np.array(X), np.array(y)
|
164
|
+
|
165
|
+
X_train, y_train_1d = create_dataset(train_data_2d)
|
166
|
+
X_test, y_test_1d = create_dataset(test_data_2d)
|
167
|
+
mylogger.debug(f"훈련데이터 shape (입력, 정답) / {X_train.shape}")
|
168
|
+
mylogger.debug(f"테스트데이터 shape (입력, 정답) / {X_test.shape}")
|
169
|
+
|
170
|
+
try:
|
171
|
+
mylogger.debug("2차원 데이터를 3차원으로 변환합니다.")
|
172
|
+
# LSTM 모델 입력을 위해 데이터를 3차원으로 변환
|
173
|
+
X_train_3d = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
|
174
|
+
X_test_3d = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
|
175
|
+
except IndexError:
|
176
|
+
return LSTMData(
|
177
|
+
code=self.code,
|
178
|
+
data_2d=np.array([]),
|
179
|
+
train_size=0,
|
180
|
+
train_data_2d=np.array([]),
|
181
|
+
test_data_2d=np.array([]),
|
182
|
+
X_train_3d=np.array([]),
|
183
|
+
X_test_3d=np.array([]),
|
184
|
+
y_train_1d=np.array([]),
|
185
|
+
y_test_1d=np.array([]),
|
186
|
+
)
|
187
|
+
|
188
|
+
mylogger.debug(
|
189
|
+
f'차원 - X_train_3d : {X_train_3d.ndim}, X_test_3d : {X_test_3d.ndim}, y_train : {y_train_1d.ndim}, y_test : {y_test_1d.ndim}')
|
190
|
+
mylogger.debug(
|
191
|
+
f'len - X_train_3d : {len(X_train_3d)}, X_test_3d : {len(X_test_3d)}, y_train : {len(y_train_1d)}, y_test : {len(y_test_1d)}')
|
192
|
+
|
193
|
+
return LSTMData(
|
194
|
+
code=self.code,
|
195
|
+
data_2d=data_2d,
|
196
|
+
train_size=train_size,
|
197
|
+
train_data_2d=train_data_2d,
|
198
|
+
test_data_2d=test_data_2d,
|
199
|
+
X_train_3d=X_train_3d,
|
200
|
+
X_test_3d=X_test_3d,
|
201
|
+
y_train_1d=y_train_1d,
|
202
|
+
y_test_1d=y_test_1d,
|
203
|
+
)
|
204
|
+
self.raw_data = get_raw_data()
|
205
|
+
self.lstm_data = preprocessing_for_lstm()
|
206
|
+
|
207
|
+
def ensemble_training(self, num) -> Tuple[list, LSTMGrade]:
|
208
|
+
"""
|
209
|
+
Represents the implementation for training, predicting, and evaluating an LSTM-based deep learning model.
|
210
|
+
Defines functions for LSTM model training, prediction, grading results, and ensemble model training.
|
211
|
+
|
212
|
+
Methods defined:
|
213
|
+
- model_training: Constructs and trains the LSTM model with specified layers and configurations.
|
214
|
+
- prediction: Uses a trained model to perform predictions, restores normalization, and returns results.
|
215
|
+
- grading: Analyzes the training and testing predictions, computes evaluation metrics, and returns grading details.
|
216
|
+
- ensemble_training: Trains multiple LSTM models to create ensemble predictions and future forecasts.
|
217
|
+
|
218
|
+
ensemble_training:
|
219
|
+
Trains multiple LSTM models iteratively and forms ensemble predictions for training, testing datasets,
|
220
|
+
and future trend forecasting. Evaluates the models collectively using grading metrics.
|
221
|
+
|
222
|
+
Arguments:
|
223
|
+
num: int
|
224
|
+
The number of ensemble LSTM models to train.
|
225
|
+
|
226
|
+
Returns:
|
227
|
+
Tuple[list, LSTMGrade]
|
228
|
+
A list of predicted scaled values for future forecasts and the performance grading object.
|
229
|
+
|
230
|
+
Raises:
|
231
|
+
Does not explicitly raise errors but logs warnings for possible issues during training or prediction.
|
232
|
+
"""
|
233
|
+
def model_training() -> Sequential:
|
234
|
+
# LSTM 모델 생성 - 유닛과 드롭아웃의 수는 테스트로 최적화 됨.
|
235
|
+
model = Sequential()
|
236
|
+
mylogger.debug(f"훈련 데이터 shape - {self.lstm_data.X_train_3d.shape}")
|
237
|
+
try:
|
238
|
+
# Input(shape=(50, 1))는 50개의 타임스텝을 가지는 입력 데이터를 처리하며, 각 타임스텝에 1개의 특성이 있다는 것을 의미
|
239
|
+
model.add(Input(shape=(self.lstm_data.X_train_3d.shape[1], 1))) # 입력 레이어에 명시적으로 Input을 사용
|
240
|
+
except IndexError:
|
241
|
+
mylogger.error("모델 트레이닝을 위한 자료가 부족합니다.")
|
242
|
+
return model
|
243
|
+
|
244
|
+
model.add(LSTM(units=150, return_sequences=True))
|
245
|
+
model.add(Dropout(0.2))
|
246
|
+
model.add(LSTM(units=75, return_sequences=False))
|
247
|
+
model.add(Dropout(0.2))
|
248
|
+
model.add(Dense(units=25))
|
249
|
+
model.add(Dropout(0.3))
|
250
|
+
model.add(Dense(units=1))
|
251
|
+
|
252
|
+
# 모델 요약 출력
|
253
|
+
model.summary()
|
254
|
+
|
255
|
+
# 모델 컴파일 및 학습
|
256
|
+
model.compile(optimizer='adam', loss='mean_squared_error')
|
257
|
+
|
258
|
+
# 조기 종료 설정
|
259
|
+
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
|
260
|
+
|
261
|
+
# 모델 학습 - 과적합 방지위한 조기종료 세팅
|
262
|
+
model.fit(self.lstm_data.X_train_3d, self.lstm_data.y_train_1d,
|
263
|
+
epochs=75, batch_size=32, validation_data=(self.lstm_data.X_test_3d, self.lstm_data.y_test_1d),
|
264
|
+
callbacks=[early_stopping])
|
265
|
+
return model
|
266
|
+
|
267
|
+
def prediction(model_in: Sequential, data: np.ndarray) -> np.ndarray:
|
268
|
+
"""
|
269
|
+
훈련될 모델을 통해 예측을 시행하여 정규화를 복원하고 결과 반환한다.
|
270
|
+
:param model_in:
|
271
|
+
:param data:
|
272
|
+
:return:
|
273
|
+
"""
|
274
|
+
predictions_2d = model_in.predict(data)
|
275
|
+
predictions_scaled_2d = self.scaler.inverse_transform(predictions_2d) # 스케일링 복원
|
276
|
+
mylogger.info(
|
277
|
+
f'predictions_scaled_2d : ndim - {predictions_scaled_2d.ndim} len - {len(predictions_scaled_2d)}') # numpy.ndarray 타입
|
278
|
+
mylogger.debug(f'predictions_scaled_2d[:5] :{predictions_scaled_2d[:5]}')
|
279
|
+
return predictions_scaled_2d
|
280
|
+
|
281
|
+
def grading(train_predictions: list, test_predictions: list) -> LSTMGrade:
|
282
|
+
"""
|
283
|
+
딥러닝 결과를 분석하기 위한 함수
|
284
|
+
:param train_predictions:
|
285
|
+
:param test_predictions:
|
286
|
+
:return:
|
287
|
+
"""
|
288
|
+
if len(train_predictions) == 0 or len(test_predictions) == 0:
|
289
|
+
mylogger.warning("딥러닝 결과가 없어서 LSTMGrade 데이터를 비워서 반환합니다.")
|
290
|
+
return LSTMGrade(
|
291
|
+
code= self.code,
|
292
|
+
train_mse=float('nan'),
|
293
|
+
train_mae=float('nan'),
|
294
|
+
train_r2=float('nan'),
|
295
|
+
test_mse=float('nan'),
|
296
|
+
test_mae=float('nan'),
|
297
|
+
test_r2=float('nan'),
|
298
|
+
)
|
299
|
+
|
300
|
+
# 예측값을 평균내서 최종 예측값 도출
|
301
|
+
mean_train_prediction_2d = np.mean(train_predictions, axis=0)
|
302
|
+
mean_test_predictions_2d = np.mean(test_predictions, axis=0)
|
303
|
+
|
304
|
+
# y값(정답) 정규화 해제
|
305
|
+
y_train_scaled_2d = self.scaler.inverse_transform(self.lstm_data.y_train_1d.reshape(-1, 1))
|
306
|
+
y_test_scaled_2d = self.scaler.inverse_transform(self.lstm_data.y_test_1d.reshape(-1, 1))
|
307
|
+
|
308
|
+
# 평가 지표 계산
|
309
|
+
train_mse = mean_squared_error(y_train_scaled_2d, mean_train_prediction_2d)
|
310
|
+
train_mae = mean_absolute_error(y_train_scaled_2d, mean_train_prediction_2d)
|
311
|
+
train_r2 = r2_score(y_train_scaled_2d, mean_train_prediction_2d)
|
312
|
+
|
313
|
+
test_mse = mean_squared_error(y_test_scaled_2d, mean_test_predictions_2d)
|
314
|
+
test_mae = mean_absolute_error(y_test_scaled_2d, mean_test_predictions_2d)
|
315
|
+
test_r2 = r2_score(y_test_scaled_2d, mean_test_predictions_2d)
|
316
|
+
|
317
|
+
# 평가 결과 출력
|
318
|
+
print("Training Data:")
|
319
|
+
print(f"Train MSE: {train_mse}, Train MAE: {train_mae}, Train R²: {train_r2}")
|
320
|
+
print("\nTesting Data:")
|
321
|
+
print(f"Test MSE: {test_mse}, Test MAE: {test_mae}, Test R²: {test_r2}")
|
322
|
+
# mse, mae는 작을수록 좋으며 R^2은 0-1 사이값 1에 가까울수록 정확함
|
323
|
+
# 과적합에 대한 평가는 train 과 test를 비교하여 test가 너무 않좋으면 과적합 의심.
|
324
|
+
|
325
|
+
return LSTMGrade(
|
326
|
+
code=self.code,
|
327
|
+
train_mse=train_mse,
|
328
|
+
train_mae=train_mae,
|
329
|
+
train_r2=train_r2,
|
330
|
+
test_mse=test_mse,
|
331
|
+
test_mae=test_mae,
|
332
|
+
test_r2=test_r2,
|
333
|
+
)
|
334
|
+
|
335
|
+
ensemble_train_predictions_2d = []
|
336
|
+
ensemble_test_predictions_2d = []
|
337
|
+
ensemble_future_predictions_2d = []
|
338
|
+
|
339
|
+
for i in range(num):
|
340
|
+
print(f"Training model {i + 1}/{num}...")
|
341
|
+
model = model_training()
|
342
|
+
|
343
|
+
if len(model.layers) == 0:
|
344
|
+
mylogger.warning("이 모델은 빈 Sequential() 입니다.")
|
345
|
+
return [], grading([],[])
|
346
|
+
else:
|
347
|
+
mylogger.info("레이어가 있는 모델입니다.")
|
348
|
+
|
349
|
+
# 훈련 데이터 예측
|
350
|
+
train_predictions_scaled_2d = prediction(model, self.lstm_data.X_train_3d)
|
351
|
+
ensemble_train_predictions_2d.append(train_predictions_scaled_2d)
|
352
|
+
|
353
|
+
# 테스트 데이터 예측
|
354
|
+
test_predictions_scaled_2d = prediction(model, self.lstm_data.X_test_3d)
|
355
|
+
ensemble_test_predictions_2d.append(test_predictions_scaled_2d)
|
356
|
+
|
357
|
+
# 8. 미래 30일 예측
|
358
|
+
# 마지막 60일간의 데이터를 기반으로 미래 30일을 예측
|
359
|
+
|
360
|
+
last_60_days_2d = self.lstm_data.test_data_2d[-60:]
|
361
|
+
last_60_days_3d = last_60_days_2d.reshape(1, -1, 1)
|
362
|
+
|
363
|
+
future_predictions = []
|
364
|
+
for _ in range(self.future_days):
|
365
|
+
predicted_price_2d = model.predict(last_60_days_3d)
|
366
|
+
future_predictions.append(predicted_price_2d[0][0])
|
367
|
+
|
368
|
+
# 예측값을 다시 입력으로 사용하여 새로운 예측을 만듦
|
369
|
+
predicted_price_reshaped = np.reshape(predicted_price_2d, (1, 1, 1)) # 3D 배열로 변환
|
370
|
+
last_60_days_3d = np.append(last_60_days_3d[:, 1:, :], predicted_price_reshaped, axis=1)
|
371
|
+
|
372
|
+
# 예측된 주가를 다시 스케일링 복원
|
373
|
+
future_predictions_2d = np.array(future_predictions).reshape(-1, 1)
|
374
|
+
future_predictions_scaled_2d = self.scaler.inverse_transform(future_predictions_2d)
|
375
|
+
ensemble_future_predictions_2d.append(future_predictions_scaled_2d)
|
376
|
+
|
377
|
+
lstm_grade = grading(ensemble_train_predictions_2d, ensemble_test_predictions_2d)
|
378
|
+
|
379
|
+
return ensemble_future_predictions_2d, lstm_grade
|
380
|
+
|
381
|
+
def get_final_predictions(self, refresh: bool, num=5) -> Tuple[dict, LSTMGrade]:
|
382
|
+
"""
|
383
|
+
Fetches final predictions based on an ensemble method using deep learning models. This process averages the
|
384
|
+
predictions to forecast future data, primarily for debugging purposes as it is challenging to utilize
|
385
|
+
Redis cache for evaluation.
|
386
|
+
|
387
|
+
Parameters:
|
388
|
+
refresh (bool): Flag to indicate whether to refresh cached data.
|
389
|
+
num (int, optional): Number of iterations for predictions. Defaults to 5.
|
390
|
+
|
391
|
+
Returns:
|
392
|
+
Tuple[dict, LSTMGrade]: A tuple containing a dictionary of future predictions and their corresponding
|
393
|
+
evaluation grade.
|
394
|
+
|
395
|
+
Raises:
|
396
|
+
AssertionError: If the lengths of future dates and predicted values do not match.
|
397
|
+
|
398
|
+
Notes:
|
399
|
+
- This function integrates ensemble training and caching of predictive data.
|
400
|
+
- The future prediction keys correspond to the dates in "YYYY-MM-DD" format.
|
401
|
+
- Makes use of Redis for data retrieval and caching mechanisms.
|
402
|
+
"""
|
403
|
+
print("**** Start get_final_predictions... ****")
|
404
|
+
redis_name = f'{self.code}_mylstm_predictions'
|
405
|
+
|
406
|
+
print(
|
407
|
+
f"redisname: '{redis_name}' / refresh : {refresh} / expire_time : {expire_time/3600}h")
|
408
|
+
|
409
|
+
def fetch_final_predictions(num_in) -> tuple:
|
410
|
+
"""
|
411
|
+
앙상블법으로 딥러닝을 모델을 반복해서 평균을 내서 미래를 예측한다. 평가는 래시스 캐시로 반환하기 어려워 일단 디버그 용도로만 사용하기로
|
412
|
+
:param num_in:
|
413
|
+
:return:
|
414
|
+
"""
|
415
|
+
def make_future_data_dict(future_predictions) -> dict:
|
416
|
+
# 시각화를 위한 준비 - 날짜 생성 (미래 예측 날짜), 미래예측값 평균
|
417
|
+
mylogger.debug(self.raw_data)
|
418
|
+
last_date = self.raw_data.index[-1]
|
419
|
+
mylogger.debug(f'last_date : {last_date}')
|
420
|
+
future_dates = pd.date_range(last_date, periods=self.future_days + 1).tolist()[1:]
|
421
|
+
mylogger.debug(f'future_dates : {future_dates}')
|
422
|
+
final_future_predictions = np.mean(future_predictions, axis=0).tolist()
|
423
|
+
mylogger.debug(f'final_future_predictions(예측주가 리스트) : {final_future_predictions}')
|
424
|
+
|
425
|
+
assert len(future_dates) == len(
|
426
|
+
final_future_predictions), "future_dates 와 final_future_predictions 개수가 일치하지 않습니다."
|
427
|
+
|
428
|
+
data = {}
|
429
|
+
for i in range(len(future_dates)):
|
430
|
+
data[future_dates[i].strftime("%Y-%m-%d")] = final_future_predictions[i][0]
|
431
|
+
return data
|
432
|
+
|
433
|
+
# 앙상블 트레이닝 시행
|
434
|
+
future_predictions_2d, lstm_grade = self.ensemble_training(num=num_in)
|
435
|
+
mylogger.debug(f'future_predictions_2d[:5] : {future_predictions_2d[:5]}')
|
436
|
+
mylogger.debug(f'lstm grade(학습결과평과) : {lstm_grade}')
|
437
|
+
if len(future_predictions_2d) == 0:
|
438
|
+
return {}, lstm_grade
|
439
|
+
|
440
|
+
# {날짜(유닉스타임): 예측주가} 형식으로 딕서너리로 제작
|
441
|
+
future_data = make_future_data_dict(future_predictions_2d)
|
442
|
+
mylogger.debug(f'future_data : {future_data}')
|
443
|
+
|
444
|
+
return future_data, lstm_grade
|
445
|
+
|
446
|
+
return myredis.Base.fetch_and_cache_data(redis_name, refresh, fetch_final_predictions, num, timer=expire_time)
|
447
|
+
|
448
|
+
def export(self, refresh=False, to="str", num=5) -> Optional[str]:
|
449
|
+
"""
|
450
|
+
Prepares and exports stock price trend graphs based on past and predicted data.
|
451
|
+
The graphs include markers for actual prices along with a line-plot for forecasted prices.
|
452
|
+
The function offers multiple export formats: an HTML string, a PNG file, or an HTML file.
|
453
|
+
|
454
|
+
Parameters
|
455
|
+
----------
|
456
|
+
refresh : bool, optional
|
457
|
+
Specifies whether to refresh dataset before preparing future data. Defaults to False.
|
458
|
+
to : str, optional
|
459
|
+
Determines the export format of the graph. Valid choices are
|
460
|
+
'str', 'png', or 'htmlfile'. Defaults to 'str'.
|
461
|
+
num : int, optional
|
462
|
+
Specifies the number of future days to include in prediction. Defaults to 5.
|
463
|
+
|
464
|
+
Returns
|
465
|
+
-------
|
466
|
+
Optional[str]
|
467
|
+
A string representation of the graph in HTML format if `to` is set to 'str'.
|
468
|
+
Returns None if `to` is set to either 'png' or 'htmlfile'.
|
469
|
+
|
470
|
+
Raises
|
471
|
+
------
|
472
|
+
Exception
|
473
|
+
Raised if the `to` parameter does not match the allowed values: 'str', 'png', or 'htmlfile'.
|
474
|
+
"""
|
475
|
+
def prepare_past_data(past_days) -> tuple:
|
476
|
+
# 데이터 준비
|
477
|
+
raw_data_copied = self.raw_data.reset_index()
|
478
|
+
data = raw_data_copied[['Date', 'Close']][-past_days:].reset_index(drop=True)
|
479
|
+
|
480
|
+
# 'Date'와 'Close' 열 추출
|
481
|
+
past_dates = pd.to_datetime(data['Date'])
|
482
|
+
past_prices = data['Close']
|
483
|
+
|
484
|
+
# 'past_prices'가 Series인지 확인
|
485
|
+
if isinstance(past_prices, pd.DataFrame):
|
486
|
+
past_prices = past_prices.squeeze()
|
487
|
+
|
488
|
+
# 'Close' 열의 데이터 타입 변경
|
489
|
+
past_prices = past_prices.astype(float)
|
490
|
+
return past_dates, past_prices
|
491
|
+
|
492
|
+
def prepare_future_data(refresh_in, num_in) -> tuple:
|
493
|
+
future_data, lstm_grade = self.get_final_predictions(refresh=refresh_in, num=num_in)
|
494
|
+
|
495
|
+
# 예측 데이터 준비
|
496
|
+
future_dates = pd.to_datetime(list(future_data.keys()))
|
497
|
+
|
498
|
+
future_prices = pd.Series(future_data.values(), index=range(len(future_data.values()))).astype(float)
|
499
|
+
return future_dates, future_prices
|
500
|
+
|
501
|
+
self.initializing()
|
502
|
+
past_dates, past_prices = prepare_past_data(past_days=120)
|
503
|
+
future_dates, future_prices = prepare_future_data(refresh_in=refresh, num_in=num)
|
504
|
+
|
505
|
+
# 그래프 생성
|
506
|
+
fig = go.Figure()
|
507
|
+
|
508
|
+
# 실제 데이터 추가
|
509
|
+
fig.add_trace(go.Scatter(
|
510
|
+
x=past_dates,
|
511
|
+
y=past_prices,
|
512
|
+
mode='markers',
|
513
|
+
name='실제주가'
|
514
|
+
))
|
515
|
+
|
516
|
+
# 예측 데이터 추가
|
517
|
+
fig.add_trace(go.Scatter(
|
518
|
+
x=future_dates,
|
519
|
+
y=future_prices,
|
520
|
+
mode='lines+markers',
|
521
|
+
name='예측치(30일)'
|
522
|
+
))
|
523
|
+
|
524
|
+
# 레이아웃 업데이트
|
525
|
+
fig.update_layout(
|
526
|
+
xaxis_title='일자',
|
527
|
+
yaxis_title='주가(원)',
|
528
|
+
xaxis=dict(
|
529
|
+
tickformat='%Y/%m',
|
530
|
+
),
|
531
|
+
yaxis=dict(
|
532
|
+
tickformat=".0f",
|
533
|
+
),
|
534
|
+
showlegend=True,
|
535
|
+
)
|
536
|
+
|
537
|
+
mylogger.debug(f"past_dates({len(past_dates)}) - {past_dates}")
|
538
|
+
mylogger.debug(f"past_prices({len(past_prices)} - {past_prices}")
|
539
|
+
mylogger.debug(f"future_dates({len(future_dates)}) - {future_dates}")
|
540
|
+
mylogger.debug(f"future_prices({len(future_prices)}) - {future_prices}")
|
541
|
+
|
542
|
+
fig.update_layout(
|
543
|
+
# title=f'{self.code} {self.name} 주가 예측 그래프(prophet)',
|
544
|
+
xaxis_title='일자',
|
545
|
+
yaxis_title='주가(원)',
|
546
|
+
xaxis=dict(
|
547
|
+
tickformat='%Y/%m', # X축을 '연/월' 형식으로 표시
|
548
|
+
),
|
549
|
+
yaxis=dict(
|
550
|
+
tickformat=".0f", # 소수점 없이 원래 숫자 표시
|
551
|
+
),
|
552
|
+
showlegend=False,
|
553
|
+
)
|
554
|
+
|
555
|
+
if to == 'str':
|
556
|
+
# 그래프 HTML로 변환 (string 형식으로 저장)
|
557
|
+
graph_html = plot(fig, output_type='div')
|
558
|
+
return graph_html
|
559
|
+
elif to == 'png':
|
560
|
+
# 그래프를 PNG 파일로 저장
|
561
|
+
fig.write_image(f"myLSTM_{self.code}.png")
|
562
|
+
return None
|
563
|
+
elif to == 'htmlfile':
|
564
|
+
# 그래프를 HTML로 저장
|
565
|
+
plot(fig, filename=f'myLSTM_{self.code}.html', auto_open=False)
|
566
|
+
return None
|
567
|
+
else:
|
568
|
+
Exception("to 인자가 맞지 않습니다.")
|
569
|
+
|
570
|
+
def visualization(self, refresh=True):
|
571
|
+
"""
|
572
|
+
Visualizes actual and predicted stock prices, allowing for an evaluation of the
|
573
|
+
predictions made using a Long Short-Term Memory (LSTM) model. The visualization
|
574
|
+
includes plots for both future predicted prices and historical price comparisons
|
575
|
+
for improved insights.
|
576
|
+
|
577
|
+
Parameters
|
578
|
+
----------
|
579
|
+
refresh : bool, optional
|
580
|
+
Indicates whether to refresh and retrieve the latest predictions before
|
581
|
+
visualizing. Defaults to True.
|
582
|
+
|
583
|
+
Raises
|
584
|
+
------
|
585
|
+
None
|
586
|
+
|
587
|
+
Returns
|
588
|
+
-------
|
589
|
+
None
|
590
|
+
"""
|
591
|
+
self.initializing()
|
592
|
+
future_data, _ = self.get_final_predictions(refresh=refresh)
|
593
|
+
mylogger.debug(f'future_data : {future_data}')
|
594
|
+
future_dates = pd.to_datetime(list(future_data.keys()))
|
595
|
+
mylogger.debug(f'future_dates : {future_dates}')
|
596
|
+
future_prices = pd.Series(future_data.values(), index=range(len(future_data.values()))).astype(float)
|
597
|
+
mylogger.debug(f'future_prices : {future_prices}')
|
598
|
+
|
599
|
+
# 시각화1
|
600
|
+
plt.figure(figsize=(10, 6))
|
601
|
+
|
602
|
+
# 실제 주가
|
603
|
+
plt.plot(self.raw_data.index, self.raw_data['Close'], label='Actual Price')
|
604
|
+
|
605
|
+
# 미래 주가 예측
|
606
|
+
plt.plot(future_dates, future_prices, label='Future Predicted Price', linestyle='--')
|
607
|
+
|
608
|
+
plt.xlabel('Date')
|
609
|
+
plt.ylabel('Stock Price')
|
610
|
+
plt.legend()
|
611
|
+
plt.title(f'{self.name} Stock Price Prediction with LSTM')
|
612
|
+
plt.show()
|
613
|
+
|
614
|
+
"""# 시각화2
|
615
|
+
plt.figure(figsize=(10, 6))
|
616
|
+
plt.plot(self.raw_data.index[self.lstm_data.train_size + 60:], self.lstm_data.data_2d[self.lstm_data.train_size + 60:], label='Actual Price')
|
617
|
+
plt.plot(self.raw_data.index[self.lstm_data.train_size + 60:], lstm_grade.mean_test_predictions_2d, label='Predicted Price')
|
618
|
+
plt.xlabel('Date')
|
619
|
+
plt.ylabel('Price')
|
620
|
+
plt.legend()
|
621
|
+
plt.title('Stock Price Prediction with LSTM Ensemble')
|
622
|
+
plt.show()"""
|
623
|
+
|
624
|
+
def is_up(self) -> bool:
|
625
|
+
"""
|
626
|
+
Determines if the predicted data indicates an increasing trend.
|
627
|
+
|
628
|
+
This method evaluates the results of future predictions and checks if all the
|
629
|
+
subsequent values in the prediction data increase compared to their predecessors.
|
630
|
+
|
631
|
+
Returns:
|
632
|
+
bool: True if all future predicted values increase in sequence, False otherwise.
|
633
|
+
|
634
|
+
Raises:
|
635
|
+
None: This method does not raise any exceptions.
|
636
|
+
"""
|
637
|
+
# 튜플의 [0]은 날짜 [1]은 값 배열
|
638
|
+
future_data, _ = self.get_final_predictions(refresh=False)
|
639
|
+
# 데이터를 1D 배열로 변환
|
640
|
+
flattened_data = list(future_data.values())
|
641
|
+
mylogger.debug(f"flattened_data : {flattened_data}")
|
642
|
+
# 증가 여부 확인
|
643
|
+
return all(flattened_data[i] < flattened_data[i + 1] for i in range(len(flattened_data) - 1))
|
644
|
+
|
645
|
+
@staticmethod
|
646
|
+
def caching_based_on_prophet_ranking(refresh: bool, top=20):
|
647
|
+
"""
|
648
|
+
This method utilizes a ranking system generated by the Score class for
|
649
|
+
predictive caching.
|
650
|
+
|
651
|
+
Parameters
|
652
|
+
----------
|
653
|
+
refresh : bool
|
654
|
+
Whether to refresh the predictions for the selected items.
|
655
|
+
top : int, optional
|
656
|
+
The number of top-ranked items to process, by default 20.
|
657
|
+
"""
|
658
|
+
ranking_topn = score.Score.ranking(refresh=False, top=top)
|
659
|
+
mylogger.info(ranking_topn)
|
660
|
+
mylstm = MyLSTM('005930')
|
661
|
+
print(f"*** LSTM prediction redis cashing top{top} items ***")
|
662
|
+
for i, (code, _) in enumerate(ranking_topn.items()):
|
663
|
+
mylstm.code = code
|
664
|
+
print(f"{i + 1}. {mylstm.code}/{mylstm.name}")
|
665
|
+
mylstm.initializing()
|
666
|
+
mylstm.get_final_predictions(refresh=refresh, num=5)
|
667
|
+
|
668
|
+
|
669
|
+
|
670
|
+
|