akshare-one 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akshare_one/__init__.py +31 -31
- akshare_one/financial.py +46 -46
- akshare_one/indicators.py +395 -0
- akshare_one/insider.py +33 -33
- akshare_one/modules/cache.py +9 -9
- akshare_one/modules/eastmoney/client.py +88 -88
- akshare_one/modules/eastmoney/utils.py +104 -104
- akshare_one/modules/financial/base.py +22 -22
- akshare_one/modules/financial/factory.py +44 -44
- akshare_one/modules/financial/sina.py +273 -273
- akshare_one/modules/historical/base.py +47 -39
- akshare_one/modules/historical/eastmoney.py +241 -241
- akshare_one/modules/historical/eastmoney_direct.py +79 -79
- akshare_one/modules/historical/factory.py +48 -48
- akshare_one/modules/historical/sina.py +218 -218
- akshare_one/modules/indicators/__init__.py +0 -0
- akshare_one/modules/indicators/base.py +158 -0
- akshare_one/modules/indicators/factory.py +33 -0
- akshare_one/modules/indicators/simple.py +230 -0
- akshare_one/modules/indicators/talib.py +263 -0
- akshare_one/modules/insider/base.py +28 -28
- akshare_one/modules/insider/factory.py +44 -44
- akshare_one/modules/insider/xueqiu.py +115 -115
- akshare_one/modules/news/base.py +22 -22
- akshare_one/modules/news/eastmoney.py +47 -47
- akshare_one/modules/news/factory.py +44 -44
- akshare_one/modules/realtime/base.py +27 -27
- akshare_one/modules/realtime/eastmoney.py +57 -57
- akshare_one/modules/realtime/eastmoney_direct.py +37 -37
- akshare_one/modules/realtime/factory.py +48 -48
- akshare_one/modules/realtime/xueqiu.py +60 -60
- akshare_one/modules/utils.py +10 -10
- akshare_one/news.py +27 -27
- akshare_one/stock.py +78 -78
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/METADATA +70 -66
- akshare_one-0.3.0.dist-info/RECORD +39 -0
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/licenses/LICENSE +21 -21
- akshare_one-0.2.2.dist-info/RECORD +0 -33
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/WHEEL +0 -0
- {akshare_one-0.2.2.dist-info → akshare_one-0.3.0.dist-info}/top_level.txt +0 -0
@@ -1,48 +1,48 @@
|
|
1
|
-
from .base import HistoricalDataProvider
|
2
|
-
from .eastmoney import EastMoneyHistorical
|
3
|
-
from .eastmoney_direct import EastMoneyDirectHistorical
|
4
|
-
from .sina import SinaHistorical
|
5
|
-
|
6
|
-
|
7
|
-
class HistoricalDataFactory:
|
8
|
-
"""
|
9
|
-
Factory class for creating historical data providers
|
10
|
-
"""
|
11
|
-
|
12
|
-
_providers = {
|
13
|
-
"eastmoney": EastMoneyHistorical,
|
14
|
-
"eastmoney_direct": EastMoneyDirectHistorical,
|
15
|
-
"sina": SinaHistorical,
|
16
|
-
}
|
17
|
-
|
18
|
-
@classmethod
|
19
|
-
def get_provider(cls, provider_name: str, **kwargs) -> HistoricalDataProvider:
|
20
|
-
"""
|
21
|
-
Get a historical data provider by name
|
22
|
-
|
23
|
-
Args:
|
24
|
-
provider_name: Name of the provider (e.g., 'eastmoney')
|
25
|
-
**kwargs: Additional arguments to pass to the provider's constructor
|
26
|
-
|
27
|
-
Returns:
|
28
|
-
HistoricalDataProvider: An instance of the requested provider
|
29
|
-
|
30
|
-
Raises:
|
31
|
-
ValueError: If the requested provider is not found
|
32
|
-
"""
|
33
|
-
provider_class = cls._providers.get(provider_name.lower())
|
34
|
-
if not provider_class:
|
35
|
-
raise ValueError(f"Unknown historical data provider: {provider_name}")
|
36
|
-
|
37
|
-
return provider_class(**kwargs)
|
38
|
-
|
39
|
-
@classmethod
|
40
|
-
def register_provider(cls, name: str, provider_class: type):
|
41
|
-
"""
|
42
|
-
Register a new historical data provider
|
43
|
-
|
44
|
-
Args:
|
45
|
-
name: Name to associate with this provider
|
46
|
-
provider_class: The provider class to register
|
47
|
-
"""
|
48
|
-
cls._providers[name.lower()] = provider_class
|
1
|
+
from .base import HistoricalDataProvider
|
2
|
+
from .eastmoney import EastMoneyHistorical
|
3
|
+
from .eastmoney_direct import EastMoneyDirectHistorical
|
4
|
+
from .sina import SinaHistorical
|
5
|
+
|
6
|
+
|
7
|
+
class HistoricalDataFactory:
|
8
|
+
"""
|
9
|
+
Factory class for creating historical data providers
|
10
|
+
"""
|
11
|
+
|
12
|
+
_providers = {
|
13
|
+
"eastmoney": EastMoneyHistorical,
|
14
|
+
"eastmoney_direct": EastMoneyDirectHistorical,
|
15
|
+
"sina": SinaHistorical,
|
16
|
+
}
|
17
|
+
|
18
|
+
@classmethod
|
19
|
+
def get_provider(cls, provider_name: str, **kwargs) -> HistoricalDataProvider:
|
20
|
+
"""
|
21
|
+
Get a historical data provider by name
|
22
|
+
|
23
|
+
Args:
|
24
|
+
provider_name: Name of the provider (e.g., 'eastmoney')
|
25
|
+
**kwargs: Additional arguments to pass to the provider's constructor
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
HistoricalDataProvider: An instance of the requested provider
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
ValueError: If the requested provider is not found
|
32
|
+
"""
|
33
|
+
provider_class = cls._providers.get(provider_name.lower())
|
34
|
+
if not provider_class:
|
35
|
+
raise ValueError(f"Unknown historical data provider: {provider_name}")
|
36
|
+
|
37
|
+
return provider_class(**kwargs)
|
38
|
+
|
39
|
+
@classmethod
|
40
|
+
def register_provider(cls, name: str, provider_class: type):
|
41
|
+
"""
|
42
|
+
Register a new historical data provider
|
43
|
+
|
44
|
+
Args:
|
45
|
+
name: Name to associate with this provider
|
46
|
+
provider_class: The provider class to register
|
47
|
+
"""
|
48
|
+
cls._providers[name.lower()] = provider_class
|
@@ -1,218 +1,218 @@
|
|
1
|
-
from cachetools import cached
|
2
|
-
from .base import HistoricalDataProvider
|
3
|
-
import akshare as ak
|
4
|
-
import pandas as pd
|
5
|
-
from ..cache import CACHE_CONFIG
|
6
|
-
|
7
|
-
|
8
|
-
class SinaHistorical(HistoricalDataProvider):
|
9
|
-
"""Adapter for Sina historical stock data API"""
|
10
|
-
|
11
|
-
@cached(
|
12
|
-
cache=CACHE_CONFIG["hist_data_cache"],
|
13
|
-
key=lambda self: f"sina_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
14
|
-
)
|
15
|
-
def get_hist_data(self) -> pd.DataFrame:
|
16
|
-
"""Fetches Sina historical market data
|
17
|
-
|
18
|
-
Returns:
|
19
|
-
pd.DataFrame:
|
20
|
-
- timestamp
|
21
|
-
- open
|
22
|
-
- high
|
23
|
-
- low
|
24
|
-
- close
|
25
|
-
- volume
|
26
|
-
"""
|
27
|
-
self.interval = self.interval.lower()
|
28
|
-
self._validate_interval_params(self.interval, self.interval_multiplier)
|
29
|
-
|
30
|
-
try:
|
31
|
-
stock = (
|
32
|
-
f"sh{self.symbol}"
|
33
|
-
if not self.symbol.startswith(("sh", "sz", "bj"))
|
34
|
-
else self.symbol
|
35
|
-
)
|
36
|
-
|
37
|
-
if self.interval == "minute":
|
38
|
-
df = self._get_minute_data(stock)
|
39
|
-
elif self.interval == "hour":
|
40
|
-
df = self._get_hour_data(stock)
|
41
|
-
else:
|
42
|
-
df = self._get_daily_plus_data(stock)
|
43
|
-
|
44
|
-
return df
|
45
|
-
except Exception as e:
|
46
|
-
raise ValueError(f"Failed to fetch historical data: {str(e)}")
|
47
|
-
|
48
|
-
def _get_minute_data(self, stock: str) -> pd.DataFrame:
|
49
|
-
"""Fetches minute level data"""
|
50
|
-
raw_df = ak.stock_zh_a_minute(
|
51
|
-
symbol=stock,
|
52
|
-
period="1",
|
53
|
-
adjust=self._map_adjust_param(self.adjust),
|
54
|
-
)
|
55
|
-
raw_df = raw_df.rename(columns={"day": "date"})
|
56
|
-
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
57
|
-
raw_df = raw_df.set_index("date")
|
58
|
-
raw_df = (
|
59
|
-
raw_df.resample(f"{self.interval_multiplier}min")
|
60
|
-
.agg(
|
61
|
-
{
|
62
|
-
"open": "first",
|
63
|
-
"high": "max",
|
64
|
-
"low": "min",
|
65
|
-
"close": "last",
|
66
|
-
"volume": "sum",
|
67
|
-
}
|
68
|
-
)
|
69
|
-
.reset_index()
|
70
|
-
)
|
71
|
-
return self._clean_minute_data(raw_df)
|
72
|
-
|
73
|
-
def _get_hour_data(self, stock: str) -> pd.DataFrame:
|
74
|
-
"""Fetches hour level data"""
|
75
|
-
if self.interval_multiplier < 1:
|
76
|
-
raise ValueError("Hour interval multiplier must be >= 1")
|
77
|
-
|
78
|
-
raw_df = ak.stock_zh_a_minute(
|
79
|
-
symbol=stock,
|
80
|
-
period="60",
|
81
|
-
adjust=self._map_adjust_param(self.adjust),
|
82
|
-
)
|
83
|
-
raw_df = raw_df.rename(columns={"day": "date"})
|
84
|
-
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
85
|
-
raw_df = raw_df.set_index("date")
|
86
|
-
raw_df = (
|
87
|
-
raw_df.resample(f"{self.interval_multiplier}h")
|
88
|
-
.agg(
|
89
|
-
{
|
90
|
-
"open": "first",
|
91
|
-
"high": "max",
|
92
|
-
"low": "min",
|
93
|
-
"close": "last",
|
94
|
-
"volume": "sum",
|
95
|
-
}
|
96
|
-
)
|
97
|
-
.reset_index()
|
98
|
-
)
|
99
|
-
return self._clean_minute_data(raw_df)
|
100
|
-
|
101
|
-
def _get_daily_plus_data(self, stock: str) -> pd.DataFrame:
|
102
|
-
"""Fetches daily and higher-level data (day/week/month/year)"""
|
103
|
-
start_date = self._convert_date_format(self.start_date)
|
104
|
-
end_date = self._convert_date_format(self.end_date)
|
105
|
-
|
106
|
-
raw_df = ak.stock_zh_a_daily(
|
107
|
-
symbol=stock,
|
108
|
-
start_date=start_date,
|
109
|
-
end_date=end_date,
|
110
|
-
adjust=self._map_adjust_param(self.adjust),
|
111
|
-
)
|
112
|
-
|
113
|
-
if self.interval_multiplier > 1:
|
114
|
-
raw_df = self._resample_data(
|
115
|
-
raw_df, self.interval, self.interval_multiplier
|
116
|
-
)
|
117
|
-
|
118
|
-
return self._clean_data(raw_df)
|
119
|
-
|
120
|
-
def _validate_interval_params(self, interval: str, multiplier: int) -> None:
|
121
|
-
"""Validates the validity of interval and multiplier"""
|
122
|
-
if interval not in self.get_supported_intervals():
|
123
|
-
raise ValueError(f"Unsupported interval parameter: {interval}")
|
124
|
-
|
125
|
-
if interval in ["minute", "hour"] and multiplier < 1:
|
126
|
-
raise ValueError(f"interval_multiplier for {interval} level must be ≥ 1")
|
127
|
-
|
128
|
-
def _convert_date_format(self, date_str: str) -> str:
|
129
|
-
"""Converts date format from YYYY-MM-DD to YYYYMMDD"""
|
130
|
-
return date_str.replace("-", "") if "-" in date_str else date_str
|
131
|
-
|
132
|
-
def _map_adjust_param(self, adjust: str) -> str:
|
133
|
-
"""Maps adjustment parameters to the required format"""
|
134
|
-
return adjust if adjust != "none" else ""
|
135
|
-
|
136
|
-
def _resample_data(
|
137
|
-
self, df: pd.DataFrame, interval: str, multiplier: int
|
138
|
-
) -> pd.DataFrame:
|
139
|
-
"""Resamples daily and higher-level data to the specified interval"""
|
140
|
-
freq_map = {
|
141
|
-
"day": f"{multiplier}D",
|
142
|
-
"week": f"{multiplier}W-MON",
|
143
|
-
"month": f"{multiplier}MS",
|
144
|
-
"year": f"{multiplier}AS-JAN",
|
145
|
-
}
|
146
|
-
freq = freq_map[interval]
|
147
|
-
|
148
|
-
df["date"] = pd.to_datetime(df["date"])
|
149
|
-
df = df.set_index("date")
|
150
|
-
resampled = df.resample(freq).agg(
|
151
|
-
{
|
152
|
-
"open": "first",
|
153
|
-
"high": "max",
|
154
|
-
"low": "min",
|
155
|
-
"close": "last",
|
156
|
-
"volume": "sum",
|
157
|
-
}
|
158
|
-
)
|
159
|
-
return resampled.reset_index()
|
160
|
-
|
161
|
-
def _clean_minute_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
162
|
-
"""Cleans and standardizes minute/hour level data, converting timestamps to UTC"""
|
163
|
-
column_map = {
|
164
|
-
"date": "timestamp",
|
165
|
-
"open": "open",
|
166
|
-
"high": "high",
|
167
|
-
"low": "low",
|
168
|
-
"close": "close",
|
169
|
-
"volume": "volume",
|
170
|
-
}
|
171
|
-
|
172
|
-
df = raw_df.rename(columns=column_map)
|
173
|
-
|
174
|
-
if "timestamp" in df.columns:
|
175
|
-
df["timestamp"] = (
|
176
|
-
pd.to_datetime(df["timestamp"])
|
177
|
-
.dt.tz_localize("Asia/Shanghai")
|
178
|
-
.dt.tz_convert("UTC")
|
179
|
-
)
|
180
|
-
|
181
|
-
return self._select_standard_columns(df)
|
182
|
-
|
183
|
-
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
184
|
-
"""Cleans and standardizes daily and higher-level data, converting timestamps to UTC"""
|
185
|
-
column_map = {
|
186
|
-
"date": "timestamp",
|
187
|
-
"open": "open",
|
188
|
-
"high": "high",
|
189
|
-
"low": "low",
|
190
|
-
"close": "close",
|
191
|
-
"volume": "volume",
|
192
|
-
}
|
193
|
-
|
194
|
-
df = raw_df.rename(columns=column_map)
|
195
|
-
|
196
|
-
if "timestamp" in df.columns:
|
197
|
-
df["timestamp"] = (
|
198
|
-
pd.to_datetime(df["timestamp"])
|
199
|
-
.dt.tz_localize("Asia/Shanghai")
|
200
|
-
.dt.tz_convert("UTC")
|
201
|
-
)
|
202
|
-
|
203
|
-
if "volume" in df.columns:
|
204
|
-
df["volume"] = df["volume"].astype("int64")
|
205
|
-
|
206
|
-
return self._select_standard_columns(df)
|
207
|
-
|
208
|
-
def _select_standard_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
209
|
-
"""Selects and orders the standard output columns"""
|
210
|
-
standard_columns = [
|
211
|
-
"timestamp",
|
212
|
-
"open",
|
213
|
-
"high",
|
214
|
-
"low",
|
215
|
-
"close",
|
216
|
-
"volume",
|
217
|
-
]
|
218
|
-
return df[[col for col in standard_columns if col in df.columns]]
|
1
|
+
from cachetools import cached
|
2
|
+
from .base import HistoricalDataProvider
|
3
|
+
import akshare as ak
|
4
|
+
import pandas as pd
|
5
|
+
from ..cache import CACHE_CONFIG
|
6
|
+
|
7
|
+
|
8
|
+
class SinaHistorical(HistoricalDataProvider):
|
9
|
+
"""Adapter for Sina historical stock data API"""
|
10
|
+
|
11
|
+
@cached(
|
12
|
+
cache=CACHE_CONFIG["hist_data_cache"],
|
13
|
+
key=lambda self: f"sina_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
14
|
+
)
|
15
|
+
def get_hist_data(self) -> pd.DataFrame:
|
16
|
+
"""Fetches Sina historical market data
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
pd.DataFrame:
|
20
|
+
- timestamp
|
21
|
+
- open
|
22
|
+
- high
|
23
|
+
- low
|
24
|
+
- close
|
25
|
+
- volume
|
26
|
+
"""
|
27
|
+
self.interval = self.interval.lower()
|
28
|
+
self._validate_interval_params(self.interval, self.interval_multiplier)
|
29
|
+
|
30
|
+
try:
|
31
|
+
stock = (
|
32
|
+
f"sh{self.symbol}"
|
33
|
+
if not self.symbol.startswith(("sh", "sz", "bj"))
|
34
|
+
else self.symbol
|
35
|
+
)
|
36
|
+
|
37
|
+
if self.interval == "minute":
|
38
|
+
df = self._get_minute_data(stock)
|
39
|
+
elif self.interval == "hour":
|
40
|
+
df = self._get_hour_data(stock)
|
41
|
+
else:
|
42
|
+
df = self._get_daily_plus_data(stock)
|
43
|
+
|
44
|
+
return df
|
45
|
+
except Exception as e:
|
46
|
+
raise ValueError(f"Failed to fetch historical data: {str(e)}")
|
47
|
+
|
48
|
+
def _get_minute_data(self, stock: str) -> pd.DataFrame:
|
49
|
+
"""Fetches minute level data"""
|
50
|
+
raw_df = ak.stock_zh_a_minute(
|
51
|
+
symbol=stock,
|
52
|
+
period="1",
|
53
|
+
adjust=self._map_adjust_param(self.adjust),
|
54
|
+
)
|
55
|
+
raw_df = raw_df.rename(columns={"day": "date"})
|
56
|
+
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
57
|
+
raw_df = raw_df.set_index("date")
|
58
|
+
raw_df = (
|
59
|
+
raw_df.resample(f"{self.interval_multiplier}min")
|
60
|
+
.agg(
|
61
|
+
{
|
62
|
+
"open": "first",
|
63
|
+
"high": "max",
|
64
|
+
"low": "min",
|
65
|
+
"close": "last",
|
66
|
+
"volume": "sum",
|
67
|
+
}
|
68
|
+
)
|
69
|
+
.reset_index()
|
70
|
+
)
|
71
|
+
return self._clean_minute_data(raw_df)
|
72
|
+
|
73
|
+
def _get_hour_data(self, stock: str) -> pd.DataFrame:
|
74
|
+
"""Fetches hour level data"""
|
75
|
+
if self.interval_multiplier < 1:
|
76
|
+
raise ValueError("Hour interval multiplier must be >= 1")
|
77
|
+
|
78
|
+
raw_df = ak.stock_zh_a_minute(
|
79
|
+
symbol=stock,
|
80
|
+
period="60",
|
81
|
+
adjust=self._map_adjust_param(self.adjust),
|
82
|
+
)
|
83
|
+
raw_df = raw_df.rename(columns={"day": "date"})
|
84
|
+
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
85
|
+
raw_df = raw_df.set_index("date")
|
86
|
+
raw_df = (
|
87
|
+
raw_df.resample(f"{self.interval_multiplier}h")
|
88
|
+
.agg(
|
89
|
+
{
|
90
|
+
"open": "first",
|
91
|
+
"high": "max",
|
92
|
+
"low": "min",
|
93
|
+
"close": "last",
|
94
|
+
"volume": "sum",
|
95
|
+
}
|
96
|
+
)
|
97
|
+
.reset_index()
|
98
|
+
)
|
99
|
+
return self._clean_minute_data(raw_df)
|
100
|
+
|
101
|
+
def _get_daily_plus_data(self, stock: str) -> pd.DataFrame:
|
102
|
+
"""Fetches daily and higher-level data (day/week/month/year)"""
|
103
|
+
start_date = self._convert_date_format(self.start_date)
|
104
|
+
end_date = self._convert_date_format(self.end_date)
|
105
|
+
|
106
|
+
raw_df = ak.stock_zh_a_daily(
|
107
|
+
symbol=stock,
|
108
|
+
start_date=start_date,
|
109
|
+
end_date=end_date,
|
110
|
+
adjust=self._map_adjust_param(self.adjust),
|
111
|
+
)
|
112
|
+
|
113
|
+
if self.interval_multiplier > 1:
|
114
|
+
raw_df = self._resample_data(
|
115
|
+
raw_df, self.interval, self.interval_multiplier
|
116
|
+
)
|
117
|
+
|
118
|
+
return self._clean_data(raw_df)
|
119
|
+
|
120
|
+
def _validate_interval_params(self, interval: str, multiplier: int) -> None:
|
121
|
+
"""Validates the validity of interval and multiplier"""
|
122
|
+
if interval not in self.get_supported_intervals():
|
123
|
+
raise ValueError(f"Unsupported interval parameter: {interval}")
|
124
|
+
|
125
|
+
if interval in ["minute", "hour"] and multiplier < 1:
|
126
|
+
raise ValueError(f"interval_multiplier for {interval} level must be ≥ 1")
|
127
|
+
|
128
|
+
def _convert_date_format(self, date_str: str) -> str:
|
129
|
+
"""Converts date format from YYYY-MM-DD to YYYYMMDD"""
|
130
|
+
return date_str.replace("-", "") if "-" in date_str else date_str
|
131
|
+
|
132
|
+
def _map_adjust_param(self, adjust: str) -> str:
|
133
|
+
"""Maps adjustment parameters to the required format"""
|
134
|
+
return adjust if adjust != "none" else ""
|
135
|
+
|
136
|
+
def _resample_data(
|
137
|
+
self, df: pd.DataFrame, interval: str, multiplier: int
|
138
|
+
) -> pd.DataFrame:
|
139
|
+
"""Resamples daily and higher-level data to the specified interval"""
|
140
|
+
freq_map = {
|
141
|
+
"day": f"{multiplier}D",
|
142
|
+
"week": f"{multiplier}W-MON",
|
143
|
+
"month": f"{multiplier}MS",
|
144
|
+
"year": f"{multiplier}AS-JAN",
|
145
|
+
}
|
146
|
+
freq = freq_map[interval]
|
147
|
+
|
148
|
+
df["date"] = pd.to_datetime(df["date"])
|
149
|
+
df = df.set_index("date")
|
150
|
+
resampled = df.resample(freq).agg(
|
151
|
+
{
|
152
|
+
"open": "first",
|
153
|
+
"high": "max",
|
154
|
+
"low": "min",
|
155
|
+
"close": "last",
|
156
|
+
"volume": "sum",
|
157
|
+
}
|
158
|
+
)
|
159
|
+
return resampled.reset_index()
|
160
|
+
|
161
|
+
def _clean_minute_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
162
|
+
"""Cleans and standardizes minute/hour level data, converting timestamps to UTC"""
|
163
|
+
column_map = {
|
164
|
+
"date": "timestamp",
|
165
|
+
"open": "open",
|
166
|
+
"high": "high",
|
167
|
+
"low": "low",
|
168
|
+
"close": "close",
|
169
|
+
"volume": "volume",
|
170
|
+
}
|
171
|
+
|
172
|
+
df = raw_df.rename(columns=column_map)
|
173
|
+
|
174
|
+
if "timestamp" in df.columns:
|
175
|
+
df["timestamp"] = (
|
176
|
+
pd.to_datetime(df["timestamp"])
|
177
|
+
.dt.tz_localize("Asia/Shanghai")
|
178
|
+
.dt.tz_convert("UTC")
|
179
|
+
)
|
180
|
+
|
181
|
+
return self._select_standard_columns(df)
|
182
|
+
|
183
|
+
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
184
|
+
"""Cleans and standardizes daily and higher-level data, converting timestamps to UTC"""
|
185
|
+
column_map = {
|
186
|
+
"date": "timestamp",
|
187
|
+
"open": "open",
|
188
|
+
"high": "high",
|
189
|
+
"low": "low",
|
190
|
+
"close": "close",
|
191
|
+
"volume": "volume",
|
192
|
+
}
|
193
|
+
|
194
|
+
df = raw_df.rename(columns=column_map)
|
195
|
+
|
196
|
+
if "timestamp" in df.columns:
|
197
|
+
df["timestamp"] = (
|
198
|
+
pd.to_datetime(df["timestamp"])
|
199
|
+
.dt.tz_localize("Asia/Shanghai")
|
200
|
+
.dt.tz_convert("UTC")
|
201
|
+
)
|
202
|
+
|
203
|
+
if "volume" in df.columns:
|
204
|
+
df["volume"] = df["volume"].astype("int64")
|
205
|
+
|
206
|
+
return self._select_standard_columns(df)
|
207
|
+
|
208
|
+
def _select_standard_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
209
|
+
"""Selects and orders the standard output columns"""
|
210
|
+
standard_columns = [
|
211
|
+
"timestamp",
|
212
|
+
"open",
|
213
|
+
"high",
|
214
|
+
"low",
|
215
|
+
"close",
|
216
|
+
"volume",
|
217
|
+
]
|
218
|
+
return df[[col for col in standard_columns if col in df.columns]]
|
File without changes
|