akshare-one 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akshare_one/__init__.py +214 -31
- akshare_one/indicators.py +395 -395
- akshare_one/modules/cache.py +10 -9
- akshare_one/modules/eastmoney/client.py +88 -88
- akshare_one/modules/eastmoney/utils.py +104 -104
- akshare_one/modules/financial/base.py +27 -22
- akshare_one/modules/financial/eastmoney.py +184 -0
- akshare_one/modules/financial/factory.py +46 -44
- akshare_one/modules/financial/sina.py +298 -273
- akshare_one/modules/historical/base.py +47 -47
- akshare_one/modules/historical/eastmoney.py +241 -241
- akshare_one/modules/historical/eastmoney_direct.py +79 -79
- akshare_one/modules/historical/factory.py +48 -48
- akshare_one/modules/historical/sina.py +254 -254
- akshare_one/modules/indicators/base.py +158 -158
- akshare_one/modules/indicators/factory.py +33 -33
- akshare_one/modules/indicators/simple.py +230 -230
- akshare_one/modules/indicators/talib.py +263 -263
- akshare_one/modules/info/base.py +25 -0
- akshare_one/modules/info/eastmoney.py +52 -0
- akshare_one/modules/info/factory.py +44 -0
- akshare_one/modules/insider/base.py +28 -28
- akshare_one/modules/insider/factory.py +44 -44
- akshare_one/modules/insider/xueqiu.py +115 -115
- akshare_one/modules/news/base.py +22 -22
- akshare_one/modules/news/eastmoney.py +47 -47
- akshare_one/modules/news/factory.py +44 -44
- akshare_one/modules/realtime/base.py +27 -27
- akshare_one/modules/realtime/eastmoney.py +57 -57
- akshare_one/modules/realtime/eastmoney_direct.py +37 -37
- akshare_one/modules/realtime/factory.py +48 -48
- akshare_one/modules/realtime/xueqiu.py +60 -60
- akshare_one/modules/utils.py +10 -10
- {akshare_one-0.3.1.dist-info → akshare_one-0.3.3.dist-info}/METADATA +70 -70
- akshare_one-0.3.3.dist-info/RECORD +39 -0
- {akshare_one-0.3.1.dist-info → akshare_one-0.3.3.dist-info}/licenses/LICENSE +21 -21
- akshare_one/financial.py +0 -46
- akshare_one/insider.py +0 -33
- akshare_one/news.py +0 -27
- akshare_one/stock.py +0 -78
- akshare_one-0.3.1.dist-info/RECORD +0 -39
- {akshare_one-0.3.1.dist-info → akshare_one-0.3.3.dist-info}/WHEEL +0 -0
- {akshare_one-0.3.1.dist-info → akshare_one-0.3.3.dist-info}/top_level.txt +0 -0
@@ -1,47 +1,47 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
import pandas as pd
|
3
|
-
|
4
|
-
|
5
|
-
class HistoricalDataProvider(ABC):
|
6
|
-
def __init__(
|
7
|
-
self,
|
8
|
-
symbol: str,
|
9
|
-
interval: str = "day",
|
10
|
-
interval_multiplier: int = 1,
|
11
|
-
start_date: str = "1970-01-01",
|
12
|
-
end_date: str = "2030-12-31",
|
13
|
-
adjust: str = "none",
|
14
|
-
) -> None:
|
15
|
-
self.symbol = symbol
|
16
|
-
self.interval = interval
|
17
|
-
self.interval_multiplier = interval_multiplier
|
18
|
-
self.start_date = start_date
|
19
|
-
self.end_date = end_date
|
20
|
-
self.adjust = adjust
|
21
|
-
self._validate_dates()
|
22
|
-
|
23
|
-
def _validate_dates(self):
|
24
|
-
try:
|
25
|
-
pd.to_datetime(self.start_date)
|
26
|
-
pd.to_datetime(self.end_date)
|
27
|
-
except ValueError:
|
28
|
-
raise ValueError("Invalid date format. Please use YYYY-MM-DD.")
|
29
|
-
|
30
|
-
@classmethod
|
31
|
-
def get_supported_intervals(cls):
|
32
|
-
return ["minute", "hour", "day", "week", "month", "year"]
|
33
|
-
|
34
|
-
@abstractmethod
|
35
|
-
def get_hist_data(self) -> pd.DataFrame:
|
36
|
-
"""Fetches historical market data
|
37
|
-
|
38
|
-
Returns:
|
39
|
-
pd.DataFrame:
|
40
|
-
- timestamp (UTC)
|
41
|
-
- open
|
42
|
-
- high
|
43
|
-
- low
|
44
|
-
- close
|
45
|
-
- volume
|
46
|
-
"""
|
47
|
-
pass
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
|
5
|
+
class HistoricalDataProvider(ABC):
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
symbol: str,
|
9
|
+
interval: str = "day",
|
10
|
+
interval_multiplier: int = 1,
|
11
|
+
start_date: str = "1970-01-01",
|
12
|
+
end_date: str = "2030-12-31",
|
13
|
+
adjust: str = "none",
|
14
|
+
) -> None:
|
15
|
+
self.symbol = symbol
|
16
|
+
self.interval = interval
|
17
|
+
self.interval_multiplier = interval_multiplier
|
18
|
+
self.start_date = start_date
|
19
|
+
self.end_date = end_date
|
20
|
+
self.adjust = adjust
|
21
|
+
self._validate_dates()
|
22
|
+
|
23
|
+
def _validate_dates(self):
|
24
|
+
try:
|
25
|
+
pd.to_datetime(self.start_date)
|
26
|
+
pd.to_datetime(self.end_date)
|
27
|
+
except ValueError:
|
28
|
+
raise ValueError("Invalid date format. Please use YYYY-MM-DD.")
|
29
|
+
|
30
|
+
@classmethod
|
31
|
+
def get_supported_intervals(cls):
|
32
|
+
return ["minute", "hour", "day", "week", "month", "year"]
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def get_hist_data(self) -> pd.DataFrame:
|
36
|
+
"""Fetches historical market data
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
pd.DataFrame:
|
40
|
+
- timestamp (UTC)
|
41
|
+
- open
|
42
|
+
- high
|
43
|
+
- low
|
44
|
+
- close
|
45
|
+
- volume
|
46
|
+
"""
|
47
|
+
pass
|
@@ -1,241 +1,241 @@
|
|
1
|
-
from cachetools import cached
|
2
|
-
from .base import HistoricalDataProvider
|
3
|
-
import akshare as ak
|
4
|
-
import pandas as pd
|
5
|
-
from ..cache import CACHE_CONFIG
|
6
|
-
|
7
|
-
|
8
|
-
class EastMoneyHistorical(HistoricalDataProvider):
|
9
|
-
"""Adapter for EastMoney historical stock data API"""
|
10
|
-
|
11
|
-
@cached(
|
12
|
-
cache=CACHE_CONFIG["hist_data_cache"],
|
13
|
-
key=lambda self: f"eastmoney_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
14
|
-
)
|
15
|
-
def get_hist_data(self) -> pd.DataFrame:
|
16
|
-
"""Fetches EastMoney historical market data
|
17
|
-
|
18
|
-
Returns:
|
19
|
-
pd.DataFrame:
|
20
|
-
- timestamp
|
21
|
-
- open
|
22
|
-
- high
|
23
|
-
- low
|
24
|
-
- close
|
25
|
-
- volume
|
26
|
-
"""
|
27
|
-
self.interval = self.interval.lower()
|
28
|
-
self._validate_interval_params(self.interval, self.interval_multiplier)
|
29
|
-
|
30
|
-
try:
|
31
|
-
if self.interval in ["minute", "hour"]:
|
32
|
-
df = self._get_intraday_data()
|
33
|
-
else:
|
34
|
-
df = self._get_daily_plus_data()
|
35
|
-
|
36
|
-
return df
|
37
|
-
except Exception as e:
|
38
|
-
raise ValueError(f"Failed to fetch historical data: {str(e)}")
|
39
|
-
|
40
|
-
def _get_intraday_data(self) -> pd.DataFrame:
|
41
|
-
"""Fetches intraday data at minute or hour intervals"""
|
42
|
-
# Set trading hours
|
43
|
-
start_date = self._ensure_time_format(self.start_date, "09:30:00")
|
44
|
-
end_date = self._ensure_time_format(self.end_date, "15:00:00")
|
45
|
-
|
46
|
-
# Get raw data
|
47
|
-
period = "1" if self.interval == "minute" else "60"
|
48
|
-
raw_df = ak.stock_zh_a_hist_min_em(
|
49
|
-
symbol=self.symbol,
|
50
|
-
period=period,
|
51
|
-
start_date=start_date,
|
52
|
-
end_date=end_date,
|
53
|
-
adjust=self._map_adjust_param(self.adjust),
|
54
|
-
)
|
55
|
-
|
56
|
-
# Process data
|
57
|
-
resampled = self._resample_intraday_data(
|
58
|
-
raw_df,
|
59
|
-
f"{self.interval_multiplier}min"
|
60
|
-
if self.interval == "minute"
|
61
|
-
else f"{self.interval_multiplier}h",
|
62
|
-
)
|
63
|
-
return self._clean_minute_data(resampled, str(self.interval_multiplier))
|
64
|
-
|
65
|
-
def _get_daily_plus_data(self) -> pd.DataFrame:
|
66
|
-
"""Fetches daily and higher-level data (day/week/month/year)"""
|
67
|
-
start_date = self._convert_date_format(self.start_date)
|
68
|
-
end_date = self._convert_date_format(self.end_date)
|
69
|
-
|
70
|
-
period_map = {
|
71
|
-
"day": "daily",
|
72
|
-
"week": "weekly",
|
73
|
-
"month": "monthly",
|
74
|
-
"year": "monthly",
|
75
|
-
}
|
76
|
-
period = period_map[self.interval]
|
77
|
-
|
78
|
-
raw_df = ak.stock_zh_a_hist(
|
79
|
-
symbol=self.symbol,
|
80
|
-
period=period,
|
81
|
-
start_date=start_date,
|
82
|
-
end_date=end_date,
|
83
|
-
adjust=self._map_adjust_param(self.adjust),
|
84
|
-
)
|
85
|
-
|
86
|
-
if self.interval == "year":
|
87
|
-
self.interval_multiplier *= 12
|
88
|
-
|
89
|
-
if self.interval_multiplier > 1:
|
90
|
-
raw_df = self._resample_data(
|
91
|
-
raw_df, self.interval, self.interval_multiplier
|
92
|
-
)
|
93
|
-
|
94
|
-
return self._clean_data(raw_df)
|
95
|
-
|
96
|
-
def _validate_interval_params(self, interval: str, multiplier: int) -> None:
|
97
|
-
"""Validates the validity of interval and multiplier"""
|
98
|
-
if interval not in self.get_supported_intervals():
|
99
|
-
raise ValueError(f"Unsupported interval parameter: {interval}")
|
100
|
-
|
101
|
-
if interval in ["minute", "hour"] and multiplier < 1:
|
102
|
-
raise ValueError(f"interval_multiplier for {interval} level must be ≥ 1")
|
103
|
-
|
104
|
-
def _ensure_time_format(self, date_str: str, default_time: str) -> str:
|
105
|
-
"""Ensures the date string includes the time part"""
|
106
|
-
if " " not in date_str:
|
107
|
-
return f"{date_str} {default_time}"
|
108
|
-
return date_str
|
109
|
-
|
110
|
-
def _convert_date_format(self, date_str: str) -> str:
|
111
|
-
"""Converts date format from YYYY-MM-DD to YYYYMMDD"""
|
112
|
-
return date_str.replace("-", "") if "-" in date_str else date_str
|
113
|
-
|
114
|
-
def _map_adjust_param(self, adjust: str) -> str:
|
115
|
-
"""Maps adjustment parameters to the required format"""
|
116
|
-
return adjust if adjust != "none" else ""
|
117
|
-
|
118
|
-
def _resample_intraday_data(self, df: pd.DataFrame, freq: str) -> pd.DataFrame:
|
119
|
-
"""Resamples intraday data to the specified frequency"""
|
120
|
-
df["时间"] = pd.to_datetime(df["时间"])
|
121
|
-
df = df.set_index("时间")
|
122
|
-
resampled = df.resample(freq).agg(
|
123
|
-
{
|
124
|
-
"开盘": "first",
|
125
|
-
"最高": "max",
|
126
|
-
"最低": "min",
|
127
|
-
"收盘": "last",
|
128
|
-
"成交量": "sum",
|
129
|
-
"成交额": "sum",
|
130
|
-
}
|
131
|
-
)
|
132
|
-
return resampled.reset_index()
|
133
|
-
|
134
|
-
def _resample_data(
|
135
|
-
self, df: pd.DataFrame, interval: str, multiplier: int
|
136
|
-
) -> pd.DataFrame:
|
137
|
-
"""Resamples daily and higher-level data to the specified interval"""
|
138
|
-
freq_map = {
|
139
|
-
"day": f"{multiplier}D",
|
140
|
-
"week": f"{multiplier}W-MON",
|
141
|
-
"month": f"{multiplier}MS",
|
142
|
-
"year": f"{multiplier}AS-JAN",
|
143
|
-
}
|
144
|
-
freq = freq_map[interval]
|
145
|
-
|
146
|
-
df["日期"] = pd.to_datetime(df["日期"])
|
147
|
-
df = df.set_index("日期")
|
148
|
-
resampled = df.resample(freq).agg(
|
149
|
-
{
|
150
|
-
"开盘": "first",
|
151
|
-
"最高": "max",
|
152
|
-
"最低": "min",
|
153
|
-
"收盘": "last",
|
154
|
-
"成交量": "sum",
|
155
|
-
}
|
156
|
-
)
|
157
|
-
return resampled.reset_index()
|
158
|
-
|
159
|
-
def _clean_minute_data(self, raw_df: pd.DataFrame, period: str) -> pd.DataFrame:
|
160
|
-
"""Cleans and standardizes minute/hour level data, converting timestamps to UTC"""
|
161
|
-
column_map = {
|
162
|
-
"1": {
|
163
|
-
"时间": "timestamp",
|
164
|
-
"开盘": "open",
|
165
|
-
"收盘": "close",
|
166
|
-
"最高": "high",
|
167
|
-
"最低": "low",
|
168
|
-
"成交量": "volume",
|
169
|
-
"成交额": "amount",
|
170
|
-
"均价": "vwap",
|
171
|
-
},
|
172
|
-
"default": {
|
173
|
-
"时间": "timestamp",
|
174
|
-
"开盘": "open",
|
175
|
-
"收盘": "close",
|
176
|
-
"最高": "high",
|
177
|
-
"最低": "low",
|
178
|
-
"涨跌幅": "pct_change",
|
179
|
-
"涨跌额": "change",
|
180
|
-
"成交量": "volume",
|
181
|
-
"成交额": "amount",
|
182
|
-
"振幅": "amplitude",
|
183
|
-
"换手率": "turnover",
|
184
|
-
},
|
185
|
-
}
|
186
|
-
|
187
|
-
mapping = column_map["1"] if period == "1" else column_map["default"]
|
188
|
-
df = raw_df.rename(columns=mapping)
|
189
|
-
|
190
|
-
if "timestamp" in df.columns:
|
191
|
-
df["timestamp"] = (
|
192
|
-
pd.to_datetime(df["timestamp"])
|
193
|
-
.dt.tz_localize("Asia/Shanghai")
|
194
|
-
.dt.tz_convert("UTC")
|
195
|
-
)
|
196
|
-
|
197
|
-
return self._select_standard_columns(df)
|
198
|
-
|
199
|
-
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
200
|
-
"""Cleans and standardizes daily and higher-level data, converting timestamps to UTC"""
|
201
|
-
column_map = {
|
202
|
-
"日期": "timestamp",
|
203
|
-
"开盘": "open",
|
204
|
-
"收盘": "close",
|
205
|
-
"最高": "high",
|
206
|
-
"最低": "low",
|
207
|
-
"成交量": "volume",
|
208
|
-
}
|
209
|
-
|
210
|
-
available_columns = {
|
211
|
-
src: target for src, target in column_map.items() if src in raw_df.columns
|
212
|
-
}
|
213
|
-
|
214
|
-
if not available_columns:
|
215
|
-
raise ValueError("Expected columns not found in raw data")
|
216
|
-
|
217
|
-
df = raw_df.rename(columns=available_columns)
|
218
|
-
|
219
|
-
if "timestamp" in df.columns:
|
220
|
-
df["timestamp"] = (
|
221
|
-
pd.to_datetime(df["timestamp"])
|
222
|
-
.dt.tz_localize("Asia/Shanghai")
|
223
|
-
.dt.tz_convert("UTC")
|
224
|
-
)
|
225
|
-
|
226
|
-
if "volume" in df.columns:
|
227
|
-
df["volume"] = df["volume"].astype("int64")
|
228
|
-
|
229
|
-
return self._select_standard_columns(df)
|
230
|
-
|
231
|
-
def _select_standard_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
232
|
-
"""Selects and orders the standard output columns"""
|
233
|
-
standard_columns = [
|
234
|
-
"timestamp",
|
235
|
-
"open",
|
236
|
-
"high",
|
237
|
-
"low",
|
238
|
-
"close",
|
239
|
-
"volume",
|
240
|
-
]
|
241
|
-
return df[[col for col in standard_columns if col in df.columns]]
|
1
|
+
from cachetools import cached
|
2
|
+
from .base import HistoricalDataProvider
|
3
|
+
import akshare as ak
|
4
|
+
import pandas as pd
|
5
|
+
from ..cache import CACHE_CONFIG
|
6
|
+
|
7
|
+
|
8
|
+
class EastMoneyHistorical(HistoricalDataProvider):
|
9
|
+
"""Adapter for EastMoney historical stock data API"""
|
10
|
+
|
11
|
+
@cached(
|
12
|
+
cache=CACHE_CONFIG["hist_data_cache"],
|
13
|
+
key=lambda self: f"eastmoney_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
14
|
+
)
|
15
|
+
def get_hist_data(self) -> pd.DataFrame:
|
16
|
+
"""Fetches EastMoney historical market data
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
pd.DataFrame:
|
20
|
+
- timestamp
|
21
|
+
- open
|
22
|
+
- high
|
23
|
+
- low
|
24
|
+
- close
|
25
|
+
- volume
|
26
|
+
"""
|
27
|
+
self.interval = self.interval.lower()
|
28
|
+
self._validate_interval_params(self.interval, self.interval_multiplier)
|
29
|
+
|
30
|
+
try:
|
31
|
+
if self.interval in ["minute", "hour"]:
|
32
|
+
df = self._get_intraday_data()
|
33
|
+
else:
|
34
|
+
df = self._get_daily_plus_data()
|
35
|
+
|
36
|
+
return df
|
37
|
+
except Exception as e:
|
38
|
+
raise ValueError(f"Failed to fetch historical data: {str(e)}")
|
39
|
+
|
40
|
+
def _get_intraday_data(self) -> pd.DataFrame:
|
41
|
+
"""Fetches intraday data at minute or hour intervals"""
|
42
|
+
# Set trading hours
|
43
|
+
start_date = self._ensure_time_format(self.start_date, "09:30:00")
|
44
|
+
end_date = self._ensure_time_format(self.end_date, "15:00:00")
|
45
|
+
|
46
|
+
# Get raw data
|
47
|
+
period = "1" if self.interval == "minute" else "60"
|
48
|
+
raw_df = ak.stock_zh_a_hist_min_em(
|
49
|
+
symbol=self.symbol,
|
50
|
+
period=period,
|
51
|
+
start_date=start_date,
|
52
|
+
end_date=end_date,
|
53
|
+
adjust=self._map_adjust_param(self.adjust),
|
54
|
+
)
|
55
|
+
|
56
|
+
# Process data
|
57
|
+
resampled = self._resample_intraday_data(
|
58
|
+
raw_df,
|
59
|
+
f"{self.interval_multiplier}min"
|
60
|
+
if self.interval == "minute"
|
61
|
+
else f"{self.interval_multiplier}h",
|
62
|
+
)
|
63
|
+
return self._clean_minute_data(resampled, str(self.interval_multiplier))
|
64
|
+
|
65
|
+
def _get_daily_plus_data(self) -> pd.DataFrame:
|
66
|
+
"""Fetches daily and higher-level data (day/week/month/year)"""
|
67
|
+
start_date = self._convert_date_format(self.start_date)
|
68
|
+
end_date = self._convert_date_format(self.end_date)
|
69
|
+
|
70
|
+
period_map = {
|
71
|
+
"day": "daily",
|
72
|
+
"week": "weekly",
|
73
|
+
"month": "monthly",
|
74
|
+
"year": "monthly",
|
75
|
+
}
|
76
|
+
period = period_map[self.interval]
|
77
|
+
|
78
|
+
raw_df = ak.stock_zh_a_hist(
|
79
|
+
symbol=self.symbol,
|
80
|
+
period=period,
|
81
|
+
start_date=start_date,
|
82
|
+
end_date=end_date,
|
83
|
+
adjust=self._map_adjust_param(self.adjust),
|
84
|
+
)
|
85
|
+
|
86
|
+
if self.interval == "year":
|
87
|
+
self.interval_multiplier *= 12
|
88
|
+
|
89
|
+
if self.interval_multiplier > 1:
|
90
|
+
raw_df = self._resample_data(
|
91
|
+
raw_df, self.interval, self.interval_multiplier
|
92
|
+
)
|
93
|
+
|
94
|
+
return self._clean_data(raw_df)
|
95
|
+
|
96
|
+
def _validate_interval_params(self, interval: str, multiplier: int) -> None:
|
97
|
+
"""Validates the validity of interval and multiplier"""
|
98
|
+
if interval not in self.get_supported_intervals():
|
99
|
+
raise ValueError(f"Unsupported interval parameter: {interval}")
|
100
|
+
|
101
|
+
if interval in ["minute", "hour"] and multiplier < 1:
|
102
|
+
raise ValueError(f"interval_multiplier for {interval} level must be ≥ 1")
|
103
|
+
|
104
|
+
def _ensure_time_format(self, date_str: str, default_time: str) -> str:
|
105
|
+
"""Ensures the date string includes the time part"""
|
106
|
+
if " " not in date_str:
|
107
|
+
return f"{date_str} {default_time}"
|
108
|
+
return date_str
|
109
|
+
|
110
|
+
def _convert_date_format(self, date_str: str) -> str:
|
111
|
+
"""Converts date format from YYYY-MM-DD to YYYYMMDD"""
|
112
|
+
return date_str.replace("-", "") if "-" in date_str else date_str
|
113
|
+
|
114
|
+
def _map_adjust_param(self, adjust: str) -> str:
|
115
|
+
"""Maps adjustment parameters to the required format"""
|
116
|
+
return adjust if adjust != "none" else ""
|
117
|
+
|
118
|
+
def _resample_intraday_data(self, df: pd.DataFrame, freq: str) -> pd.DataFrame:
|
119
|
+
"""Resamples intraday data to the specified frequency"""
|
120
|
+
df["时间"] = pd.to_datetime(df["时间"])
|
121
|
+
df = df.set_index("时间")
|
122
|
+
resampled = df.resample(freq).agg(
|
123
|
+
{
|
124
|
+
"开盘": "first",
|
125
|
+
"最高": "max",
|
126
|
+
"最低": "min",
|
127
|
+
"收盘": "last",
|
128
|
+
"成交量": "sum",
|
129
|
+
"成交额": "sum",
|
130
|
+
}
|
131
|
+
)
|
132
|
+
return resampled.reset_index()
|
133
|
+
|
134
|
+
def _resample_data(
|
135
|
+
self, df: pd.DataFrame, interval: str, multiplier: int
|
136
|
+
) -> pd.DataFrame:
|
137
|
+
"""Resamples daily and higher-level data to the specified interval"""
|
138
|
+
freq_map = {
|
139
|
+
"day": f"{multiplier}D",
|
140
|
+
"week": f"{multiplier}W-MON",
|
141
|
+
"month": f"{multiplier}MS",
|
142
|
+
"year": f"{multiplier}AS-JAN",
|
143
|
+
}
|
144
|
+
freq = freq_map[interval]
|
145
|
+
|
146
|
+
df["日期"] = pd.to_datetime(df["日期"])
|
147
|
+
df = df.set_index("日期")
|
148
|
+
resampled = df.resample(freq).agg(
|
149
|
+
{
|
150
|
+
"开盘": "first",
|
151
|
+
"最高": "max",
|
152
|
+
"最低": "min",
|
153
|
+
"收盘": "last",
|
154
|
+
"成交量": "sum",
|
155
|
+
}
|
156
|
+
)
|
157
|
+
return resampled.reset_index()
|
158
|
+
|
159
|
+
def _clean_minute_data(self, raw_df: pd.DataFrame, period: str) -> pd.DataFrame:
|
160
|
+
"""Cleans and standardizes minute/hour level data, converting timestamps to UTC"""
|
161
|
+
column_map = {
|
162
|
+
"1": {
|
163
|
+
"时间": "timestamp",
|
164
|
+
"开盘": "open",
|
165
|
+
"收盘": "close",
|
166
|
+
"最高": "high",
|
167
|
+
"最低": "low",
|
168
|
+
"成交量": "volume",
|
169
|
+
"成交额": "amount",
|
170
|
+
"均价": "vwap",
|
171
|
+
},
|
172
|
+
"default": {
|
173
|
+
"时间": "timestamp",
|
174
|
+
"开盘": "open",
|
175
|
+
"收盘": "close",
|
176
|
+
"最高": "high",
|
177
|
+
"最低": "low",
|
178
|
+
"涨跌幅": "pct_change",
|
179
|
+
"涨跌额": "change",
|
180
|
+
"成交量": "volume",
|
181
|
+
"成交额": "amount",
|
182
|
+
"振幅": "amplitude",
|
183
|
+
"换手率": "turnover",
|
184
|
+
},
|
185
|
+
}
|
186
|
+
|
187
|
+
mapping = column_map["1"] if period == "1" else column_map["default"]
|
188
|
+
df = raw_df.rename(columns=mapping)
|
189
|
+
|
190
|
+
if "timestamp" in df.columns:
|
191
|
+
df["timestamp"] = (
|
192
|
+
pd.to_datetime(df["timestamp"])
|
193
|
+
.dt.tz_localize("Asia/Shanghai")
|
194
|
+
.dt.tz_convert("UTC")
|
195
|
+
)
|
196
|
+
|
197
|
+
return self._select_standard_columns(df)
|
198
|
+
|
199
|
+
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
200
|
+
"""Cleans and standardizes daily and higher-level data, converting timestamps to UTC"""
|
201
|
+
column_map = {
|
202
|
+
"日期": "timestamp",
|
203
|
+
"开盘": "open",
|
204
|
+
"收盘": "close",
|
205
|
+
"最高": "high",
|
206
|
+
"最低": "low",
|
207
|
+
"成交量": "volume",
|
208
|
+
}
|
209
|
+
|
210
|
+
available_columns = {
|
211
|
+
src: target for src, target in column_map.items() if src in raw_df.columns
|
212
|
+
}
|
213
|
+
|
214
|
+
if not available_columns:
|
215
|
+
raise ValueError("Expected columns not found in raw data")
|
216
|
+
|
217
|
+
df = raw_df.rename(columns=available_columns)
|
218
|
+
|
219
|
+
if "timestamp" in df.columns:
|
220
|
+
df["timestamp"] = (
|
221
|
+
pd.to_datetime(df["timestamp"])
|
222
|
+
.dt.tz_localize("Asia/Shanghai")
|
223
|
+
.dt.tz_convert("UTC")
|
224
|
+
)
|
225
|
+
|
226
|
+
if "volume" in df.columns:
|
227
|
+
df["volume"] = df["volume"].astype("int64")
|
228
|
+
|
229
|
+
return self._select_standard_columns(df)
|
230
|
+
|
231
|
+
def _select_standard_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
232
|
+
"""Selects and orders the standard output columns"""
|
233
|
+
standard_columns = [
|
234
|
+
"timestamp",
|
235
|
+
"open",
|
236
|
+
"high",
|
237
|
+
"low",
|
238
|
+
"close",
|
239
|
+
"volume",
|
240
|
+
]
|
241
|
+
return df[[col for col in standard_columns if col in df.columns]]
|