akshare-one 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- akshare_one/__init__.py +2 -3
- akshare_one/financial.py +7 -4
- akshare_one/insider.py +4 -5
- akshare_one/modules/financial/base.py +41 -0
- akshare_one/modules/financial/factory.py +44 -0
- akshare_one/{adapters → modules/financial}/sina.py +22 -203
- akshare_one/modules/historical/base.py +64 -0
- akshare_one/modules/historical/eastmoney.py +242 -0
- akshare_one/modules/historical/factory.py +46 -0
- akshare_one/modules/historical/sina.py +219 -0
- akshare_one/modules/insider/base.py +78 -0
- akshare_one/modules/insider/factory.py +44 -0
- akshare_one/{adapters → modules/insider}/xueqiu.py +17 -76
- akshare_one/modules/news/base.py +51 -0
- akshare_one/modules/news/eastmoney.py +48 -0
- akshare_one/modules/news/factory.py +44 -0
- akshare_one/modules/realtime/base.py +68 -0
- akshare_one/modules/realtime/eastmoney.py +58 -0
- akshare_one/modules/realtime/factory.py +46 -0
- akshare_one/modules/realtime/xueqiu.py +61 -0
- akshare_one/modules/utils.py +10 -0
- akshare_one/news.py +3 -4
- akshare_one/stock.py +16 -30
- {akshare_one-0.1.3.dist-info → akshare_one-0.2.0.dist-info}/METADATA +2 -2
- akshare_one-0.2.0.dist-info/RECORD +29 -0
- {akshare_one-0.1.3.dist-info → akshare_one-0.2.0.dist-info}/WHEEL +1 -1
- akshare_one/adapters/__init__.py +0 -7
- akshare_one/adapters/eastmoney.py +0 -353
- akshare_one-0.1.3.dist-info/RECORD +0 -15
- /akshare_one/{adapters/cache → modules}/cache.py +0 -0
- {akshare_one-0.1.3.dist-info → akshare_one-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {akshare_one-0.1.3.dist-info → akshare_one-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,242 @@
|
|
1
|
+
from cachetools import cached
|
2
|
+
from .base import HistoricalDataProvider, validate_hist_data
|
3
|
+
import akshare as ak
|
4
|
+
import pandas as pd
|
5
|
+
from ..cache import CACHE_CONFIG
|
6
|
+
|
7
|
+
|
8
|
+
class EastMoneyHistorical(HistoricalDataProvider):
|
9
|
+
"""Adapter for EastMoney historical stock data API"""
|
10
|
+
|
11
|
+
@validate_hist_data
|
12
|
+
@cached(
|
13
|
+
cache=CACHE_CONFIG["hist_data_cache"],
|
14
|
+
key=lambda self: f"eastmoney_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
15
|
+
)
|
16
|
+
def get_hist_data(self) -> pd.DataFrame:
|
17
|
+
"""Fetches EastMoney historical market data
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
pd.DataFrame:
|
21
|
+
- timestamp
|
22
|
+
- open
|
23
|
+
- high
|
24
|
+
- low
|
25
|
+
- close
|
26
|
+
- volume
|
27
|
+
"""
|
28
|
+
self.interval = self.interval.lower()
|
29
|
+
self._validate_interval_params(self.interval, self.interval_multiplier)
|
30
|
+
|
31
|
+
try:
|
32
|
+
if self.interval in ["minute", "hour"]:
|
33
|
+
df = self._get_intraday_data()
|
34
|
+
else:
|
35
|
+
df = self._get_daily_plus_data()
|
36
|
+
|
37
|
+
return df
|
38
|
+
except Exception as e:
|
39
|
+
raise ValueError(f"Failed to fetch historical data: {str(e)}")
|
40
|
+
|
41
|
+
def _get_intraday_data(self) -> pd.DataFrame:
|
42
|
+
"""Fetches intraday data at minute or hour intervals"""
|
43
|
+
# Set trading hours
|
44
|
+
start_date = self._ensure_time_format(self.start_date, "09:30:00")
|
45
|
+
end_date = self._ensure_time_format(self.end_date, "15:00:00")
|
46
|
+
|
47
|
+
# Get raw data
|
48
|
+
period = "1" if self.interval == "minute" else "60"
|
49
|
+
raw_df = ak.stock_zh_a_hist_min_em(
|
50
|
+
symbol=self.symbol,
|
51
|
+
period=period,
|
52
|
+
start_date=start_date,
|
53
|
+
end_date=end_date,
|
54
|
+
adjust=self._map_adjust_param(self.adjust),
|
55
|
+
)
|
56
|
+
|
57
|
+
# Process data
|
58
|
+
resampled = self._resample_intraday_data(
|
59
|
+
raw_df,
|
60
|
+
f"{self.interval_multiplier}min"
|
61
|
+
if self.interval == "minute"
|
62
|
+
else f"{self.interval_multiplier}h",
|
63
|
+
)
|
64
|
+
return self._clean_minute_data(resampled, str(self.interval_multiplier))
|
65
|
+
|
66
|
+
def _get_daily_plus_data(self) -> pd.DataFrame:
|
67
|
+
"""Fetches daily and higher-level data (day/week/month/year)"""
|
68
|
+
start_date = self._convert_date_format(self.start_date)
|
69
|
+
end_date = self._convert_date_format(self.end_date)
|
70
|
+
|
71
|
+
period_map = {
|
72
|
+
"day": "daily",
|
73
|
+
"week": "weekly",
|
74
|
+
"month": "monthly",
|
75
|
+
"year": "monthly",
|
76
|
+
}
|
77
|
+
period = period_map[self.interval]
|
78
|
+
|
79
|
+
raw_df = ak.stock_zh_a_hist(
|
80
|
+
symbol=self.symbol,
|
81
|
+
period=period,
|
82
|
+
start_date=start_date,
|
83
|
+
end_date=end_date,
|
84
|
+
adjust=self._map_adjust_param(self.adjust),
|
85
|
+
)
|
86
|
+
|
87
|
+
if self.interval == "year":
|
88
|
+
self.interval_multiplier *= 12
|
89
|
+
|
90
|
+
if self.interval_multiplier > 1:
|
91
|
+
raw_df = self._resample_data(
|
92
|
+
raw_df, self.interval, self.interval_multiplier
|
93
|
+
)
|
94
|
+
|
95
|
+
return self._clean_data(raw_df)
|
96
|
+
|
97
|
+
def _validate_interval_params(self, interval: str, multiplier: int) -> None:
|
98
|
+
"""Validates the validity of interval and multiplier"""
|
99
|
+
if interval not in self.get_supported_intervals():
|
100
|
+
raise ValueError(f"Unsupported interval parameter: {interval}")
|
101
|
+
|
102
|
+
if interval in ["minute", "hour"] and multiplier < 1:
|
103
|
+
raise ValueError(f"interval_multiplier for {interval} level must be ≥ 1")
|
104
|
+
|
105
|
+
def _ensure_time_format(self, date_str: str, default_time: str) -> str:
|
106
|
+
"""Ensures the date string includes the time part"""
|
107
|
+
if " " not in date_str:
|
108
|
+
return f"{date_str} {default_time}"
|
109
|
+
return date_str
|
110
|
+
|
111
|
+
def _convert_date_format(self, date_str: str) -> str:
|
112
|
+
"""Converts date format from YYYY-MM-DD to YYYYMMDD"""
|
113
|
+
return date_str.replace("-", "") if "-" in date_str else date_str
|
114
|
+
|
115
|
+
def _map_adjust_param(self, adjust: str) -> str:
|
116
|
+
"""Maps adjustment parameters to the required format"""
|
117
|
+
return adjust if adjust != "none" else ""
|
118
|
+
|
119
|
+
def _resample_intraday_data(self, df: pd.DataFrame, freq: str) -> pd.DataFrame:
|
120
|
+
"""Resamples intraday data to the specified frequency"""
|
121
|
+
df["时间"] = pd.to_datetime(df["时间"])
|
122
|
+
df = df.set_index("时间")
|
123
|
+
resampled = df.resample(freq).agg(
|
124
|
+
{
|
125
|
+
"开盘": "first",
|
126
|
+
"最高": "max",
|
127
|
+
"最低": "min",
|
128
|
+
"收盘": "last",
|
129
|
+
"成交量": "sum",
|
130
|
+
"成交额": "sum",
|
131
|
+
}
|
132
|
+
)
|
133
|
+
return resampled.reset_index()
|
134
|
+
|
135
|
+
def _resample_data(
|
136
|
+
self, df: pd.DataFrame, interval: str, multiplier: int
|
137
|
+
) -> pd.DataFrame:
|
138
|
+
"""Resamples daily and higher-level data to the specified interval"""
|
139
|
+
freq_map = {
|
140
|
+
"day": f"{multiplier}D",
|
141
|
+
"week": f"{multiplier}W-MON",
|
142
|
+
"month": f"{multiplier}MS",
|
143
|
+
"year": f"{multiplier}AS-JAN",
|
144
|
+
}
|
145
|
+
freq = freq_map[interval]
|
146
|
+
|
147
|
+
df["日期"] = pd.to_datetime(df["日期"])
|
148
|
+
df = df.set_index("日期")
|
149
|
+
resampled = df.resample(freq).agg(
|
150
|
+
{
|
151
|
+
"开盘": "first",
|
152
|
+
"最高": "max",
|
153
|
+
"最低": "min",
|
154
|
+
"收盘": "last",
|
155
|
+
"成交量": "sum",
|
156
|
+
}
|
157
|
+
)
|
158
|
+
return resampled.reset_index()
|
159
|
+
|
160
|
+
def _clean_minute_data(self, raw_df: pd.DataFrame, period: str) -> pd.DataFrame:
|
161
|
+
"""Cleans and standardizes minute/hour level data, converting timestamps to UTC"""
|
162
|
+
column_map = {
|
163
|
+
"1": {
|
164
|
+
"时间": "timestamp",
|
165
|
+
"开盘": "open",
|
166
|
+
"收盘": "close",
|
167
|
+
"最高": "high",
|
168
|
+
"最低": "low",
|
169
|
+
"成交量": "volume",
|
170
|
+
"成交额": "amount",
|
171
|
+
"均价": "vwap",
|
172
|
+
},
|
173
|
+
"default": {
|
174
|
+
"时间": "timestamp",
|
175
|
+
"开盘": "open",
|
176
|
+
"收盘": "close",
|
177
|
+
"最高": "high",
|
178
|
+
"最低": "low",
|
179
|
+
"涨跌幅": "pct_change",
|
180
|
+
"涨跌额": "change",
|
181
|
+
"成交量": "volume",
|
182
|
+
"成交额": "amount",
|
183
|
+
"振幅": "amplitude",
|
184
|
+
"换手率": "turnover",
|
185
|
+
},
|
186
|
+
}
|
187
|
+
|
188
|
+
mapping = column_map["1"] if period == "1" else column_map["default"]
|
189
|
+
df = raw_df.rename(columns=mapping)
|
190
|
+
|
191
|
+
if "timestamp" in df.columns:
|
192
|
+
df["timestamp"] = (
|
193
|
+
pd.to_datetime(df["timestamp"])
|
194
|
+
.dt.tz_localize("Asia/Shanghai")
|
195
|
+
.dt.tz_convert("UTC")
|
196
|
+
)
|
197
|
+
|
198
|
+
return self._select_standard_columns(df)
|
199
|
+
|
200
|
+
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
201
|
+
"""Cleans and standardizes daily and higher-level data, converting timestamps to UTC"""
|
202
|
+
column_map = {
|
203
|
+
"日期": "timestamp",
|
204
|
+
"开盘": "open",
|
205
|
+
"收盘": "close",
|
206
|
+
"最高": "high",
|
207
|
+
"最低": "low",
|
208
|
+
"成交量": "volume",
|
209
|
+
}
|
210
|
+
|
211
|
+
available_columns = {
|
212
|
+
src: target for src, target in column_map.items() if src in raw_df.columns
|
213
|
+
}
|
214
|
+
|
215
|
+
if not available_columns:
|
216
|
+
raise ValueError("Expected columns not found in raw data")
|
217
|
+
|
218
|
+
df = raw_df.rename(columns=available_columns)
|
219
|
+
|
220
|
+
if "timestamp" in df.columns:
|
221
|
+
df["timestamp"] = (
|
222
|
+
pd.to_datetime(df["timestamp"])
|
223
|
+
.dt.tz_localize("Asia/Shanghai")
|
224
|
+
.dt.tz_convert("UTC")
|
225
|
+
)
|
226
|
+
|
227
|
+
if "volume" in df.columns:
|
228
|
+
df["volume"] = df["volume"].astype("int64")
|
229
|
+
|
230
|
+
return self._select_standard_columns(df)
|
231
|
+
|
232
|
+
def _select_standard_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
233
|
+
"""Selects and orders the standard output columns"""
|
234
|
+
standard_columns = [
|
235
|
+
"timestamp",
|
236
|
+
"open",
|
237
|
+
"high",
|
238
|
+
"low",
|
239
|
+
"close",
|
240
|
+
"volume",
|
241
|
+
]
|
242
|
+
return df[[col for col in standard_columns if col in df.columns]]
|
@@ -0,0 +1,46 @@
|
|
1
|
+
from .base import HistoricalDataProvider
|
2
|
+
from .eastmoney import EastMoneyHistorical
|
3
|
+
from .sina import SinaHistorical
|
4
|
+
|
5
|
+
|
6
|
+
class HistoricalDataFactory:
|
7
|
+
"""
|
8
|
+
Factory class for creating historical data providers
|
9
|
+
"""
|
10
|
+
|
11
|
+
_providers = {
|
12
|
+
"eastmoney": EastMoneyHistorical,
|
13
|
+
"sina": SinaHistorical,
|
14
|
+
}
|
15
|
+
|
16
|
+
@classmethod
|
17
|
+
def get_provider(cls, provider_name: str, **kwargs) -> HistoricalDataProvider:
|
18
|
+
"""
|
19
|
+
Get a historical data provider by name
|
20
|
+
|
21
|
+
Args:
|
22
|
+
provider_name: Name of the provider (e.g., 'eastmoney')
|
23
|
+
**kwargs: Additional arguments to pass to the provider's constructor
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
HistoricalDataProvider: An instance of the requested provider
|
27
|
+
|
28
|
+
Raises:
|
29
|
+
ValueError: If the requested provider is not found
|
30
|
+
"""
|
31
|
+
provider_class = cls._providers.get(provider_name.lower())
|
32
|
+
if not provider_class:
|
33
|
+
raise ValueError(f"Unknown historical data provider: {provider_name}")
|
34
|
+
|
35
|
+
return provider_class(**kwargs)
|
36
|
+
|
37
|
+
@classmethod
|
38
|
+
def register_provider(cls, name: str, provider_class: type):
|
39
|
+
"""
|
40
|
+
Register a new historical data provider
|
41
|
+
|
42
|
+
Args:
|
43
|
+
name: Name to associate with this provider
|
44
|
+
provider_class: The provider class to register
|
45
|
+
"""
|
46
|
+
cls._providers[name.lower()] = provider_class
|
@@ -0,0 +1,219 @@
|
|
1
|
+
from cachetools import cached
|
2
|
+
from .base import HistoricalDataProvider, validate_hist_data
|
3
|
+
import akshare as ak
|
4
|
+
import pandas as pd
|
5
|
+
from ..cache import CACHE_CONFIG
|
6
|
+
|
7
|
+
|
8
|
+
class SinaHistorical(HistoricalDataProvider):
|
9
|
+
"""Adapter for Sina historical stock data API"""
|
10
|
+
|
11
|
+
@validate_hist_data
|
12
|
+
@cached(
|
13
|
+
cache=CACHE_CONFIG["hist_data_cache"],
|
14
|
+
key=lambda self: f"sina_hist_{self.symbol}_{self.interval}_{self.interval_multiplier}_{self.adjust}",
|
15
|
+
)
|
16
|
+
def get_hist_data(self) -> pd.DataFrame:
|
17
|
+
"""Fetches Sina historical market data
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
pd.DataFrame:
|
21
|
+
- timestamp
|
22
|
+
- open
|
23
|
+
- high
|
24
|
+
- low
|
25
|
+
- close
|
26
|
+
- volume
|
27
|
+
"""
|
28
|
+
self.interval = self.interval.lower()
|
29
|
+
self._validate_interval_params(self.interval, self.interval_multiplier)
|
30
|
+
|
31
|
+
try:
|
32
|
+
stock = (
|
33
|
+
f"sh{self.symbol}"
|
34
|
+
if not self.symbol.startswith(("sh", "sz", "bj"))
|
35
|
+
else self.symbol
|
36
|
+
)
|
37
|
+
|
38
|
+
if self.interval == "minute":
|
39
|
+
df = self._get_minute_data(stock)
|
40
|
+
elif self.interval == "hour":
|
41
|
+
df = self._get_hour_data(stock)
|
42
|
+
else:
|
43
|
+
df = self._get_daily_plus_data(stock)
|
44
|
+
|
45
|
+
return df
|
46
|
+
except Exception as e:
|
47
|
+
raise ValueError(f"Failed to fetch historical data: {str(e)}")
|
48
|
+
|
49
|
+
def _get_minute_data(self, stock: str) -> pd.DataFrame:
|
50
|
+
"""Fetches minute level data"""
|
51
|
+
raw_df = ak.stock_zh_a_minute(
|
52
|
+
symbol=stock,
|
53
|
+
period="1",
|
54
|
+
adjust=self._map_adjust_param(self.adjust),
|
55
|
+
)
|
56
|
+
raw_df = raw_df.rename(columns={"day": "date"})
|
57
|
+
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
58
|
+
raw_df = raw_df.set_index("date")
|
59
|
+
raw_df = (
|
60
|
+
raw_df.resample(f"{self.interval_multiplier}min")
|
61
|
+
.agg(
|
62
|
+
{
|
63
|
+
"open": "first",
|
64
|
+
"high": "max",
|
65
|
+
"low": "min",
|
66
|
+
"close": "last",
|
67
|
+
"volume": "sum",
|
68
|
+
}
|
69
|
+
)
|
70
|
+
.reset_index()
|
71
|
+
)
|
72
|
+
return self._clean_minute_data(raw_df)
|
73
|
+
|
74
|
+
def _get_hour_data(self, stock: str) -> pd.DataFrame:
|
75
|
+
"""Fetches hour level data"""
|
76
|
+
if self.interval_multiplier < 1:
|
77
|
+
raise ValueError("Hour interval multiplier must be >= 1")
|
78
|
+
|
79
|
+
raw_df = ak.stock_zh_a_minute(
|
80
|
+
symbol=stock,
|
81
|
+
period="60",
|
82
|
+
adjust=self._map_adjust_param(self.adjust),
|
83
|
+
)
|
84
|
+
raw_df = raw_df.rename(columns={"day": "date"})
|
85
|
+
raw_df["date"] = pd.to_datetime(raw_df["date"])
|
86
|
+
raw_df = raw_df.set_index("date")
|
87
|
+
raw_df = (
|
88
|
+
raw_df.resample(f"{self.interval_multiplier}h")
|
89
|
+
.agg(
|
90
|
+
{
|
91
|
+
"open": "first",
|
92
|
+
"high": "max",
|
93
|
+
"low": "min",
|
94
|
+
"close": "last",
|
95
|
+
"volume": "sum",
|
96
|
+
}
|
97
|
+
)
|
98
|
+
.reset_index()
|
99
|
+
)
|
100
|
+
return self._clean_minute_data(raw_df)
|
101
|
+
|
102
|
+
def _get_daily_plus_data(self, stock: str) -> pd.DataFrame:
|
103
|
+
"""Fetches daily and higher-level data (day/week/month/year)"""
|
104
|
+
start_date = self._convert_date_format(self.start_date)
|
105
|
+
end_date = self._convert_date_format(self.end_date)
|
106
|
+
|
107
|
+
raw_df = ak.stock_zh_a_daily(
|
108
|
+
symbol=stock,
|
109
|
+
start_date=start_date,
|
110
|
+
end_date=end_date,
|
111
|
+
adjust=self._map_adjust_param(self.adjust),
|
112
|
+
)
|
113
|
+
|
114
|
+
if self.interval_multiplier > 1:
|
115
|
+
raw_df = self._resample_data(
|
116
|
+
raw_df, self.interval, self.interval_multiplier
|
117
|
+
)
|
118
|
+
|
119
|
+
return self._clean_data(raw_df)
|
120
|
+
|
121
|
+
def _validate_interval_params(self, interval: str, multiplier: int) -> None:
|
122
|
+
"""Validates the validity of interval and multiplier"""
|
123
|
+
if interval not in self.get_supported_intervals():
|
124
|
+
raise ValueError(f"Unsupported interval parameter: {interval}")
|
125
|
+
|
126
|
+
if interval in ["minute", "hour"] and multiplier < 1:
|
127
|
+
raise ValueError(f"interval_multiplier for {interval} level must be ≥ 1")
|
128
|
+
|
129
|
+
def _convert_date_format(self, date_str: str) -> str:
|
130
|
+
"""Converts date format from YYYY-MM-DD to YYYYMMDD"""
|
131
|
+
return date_str.replace("-", "") if "-" in date_str else date_str
|
132
|
+
|
133
|
+
def _map_adjust_param(self, adjust: str) -> str:
|
134
|
+
"""Maps adjustment parameters to the required format"""
|
135
|
+
return adjust if adjust != "none" else ""
|
136
|
+
|
137
|
+
def _resample_data(
|
138
|
+
self, df: pd.DataFrame, interval: str, multiplier: int
|
139
|
+
) -> pd.DataFrame:
|
140
|
+
"""Resamples daily and higher-level data to the specified interval"""
|
141
|
+
freq_map = {
|
142
|
+
"day": f"{multiplier}D",
|
143
|
+
"week": f"{multiplier}W-MON",
|
144
|
+
"month": f"{multiplier}MS",
|
145
|
+
"year": f"{multiplier}AS-JAN",
|
146
|
+
}
|
147
|
+
freq = freq_map[interval]
|
148
|
+
|
149
|
+
df["date"] = pd.to_datetime(df["date"])
|
150
|
+
df = df.set_index("date")
|
151
|
+
resampled = df.resample(freq).agg(
|
152
|
+
{
|
153
|
+
"open": "first",
|
154
|
+
"high": "max",
|
155
|
+
"low": "min",
|
156
|
+
"close": "last",
|
157
|
+
"volume": "sum",
|
158
|
+
}
|
159
|
+
)
|
160
|
+
return resampled.reset_index()
|
161
|
+
|
162
|
+
def _clean_minute_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
163
|
+
"""Cleans and standardizes minute/hour level data, converting timestamps to UTC"""
|
164
|
+
column_map = {
|
165
|
+
"date": "timestamp",
|
166
|
+
"open": "open",
|
167
|
+
"high": "high",
|
168
|
+
"low": "low",
|
169
|
+
"close": "close",
|
170
|
+
"volume": "volume",
|
171
|
+
}
|
172
|
+
|
173
|
+
df = raw_df.rename(columns=column_map)
|
174
|
+
|
175
|
+
if "timestamp" in df.columns:
|
176
|
+
df["timestamp"] = (
|
177
|
+
pd.to_datetime(df["timestamp"])
|
178
|
+
.dt.tz_localize("Asia/Shanghai")
|
179
|
+
.dt.tz_convert("UTC")
|
180
|
+
)
|
181
|
+
|
182
|
+
return self._select_standard_columns(df)
|
183
|
+
|
184
|
+
def _clean_data(self, raw_df: pd.DataFrame) -> pd.DataFrame:
|
185
|
+
"""Cleans and standardizes daily and higher-level data, converting timestamps to UTC"""
|
186
|
+
column_map = {
|
187
|
+
"date": "timestamp",
|
188
|
+
"open": "open",
|
189
|
+
"high": "high",
|
190
|
+
"low": "low",
|
191
|
+
"close": "close",
|
192
|
+
"volume": "volume",
|
193
|
+
}
|
194
|
+
|
195
|
+
df = raw_df.rename(columns=column_map)
|
196
|
+
|
197
|
+
if "timestamp" in df.columns:
|
198
|
+
df["timestamp"] = (
|
199
|
+
pd.to_datetime(df["timestamp"])
|
200
|
+
.dt.tz_localize("Asia/Shanghai")
|
201
|
+
.dt.tz_convert("UTC")
|
202
|
+
)
|
203
|
+
|
204
|
+
if "volume" in df.columns:
|
205
|
+
df["volume"] = df["volume"].astype("int64")
|
206
|
+
|
207
|
+
return self._select_standard_columns(df)
|
208
|
+
|
209
|
+
def _select_standard_columns(self, df: pd.DataFrame) -> pd.DataFrame:
|
210
|
+
"""Selects and orders the standard output columns"""
|
211
|
+
standard_columns = [
|
212
|
+
"timestamp",
|
213
|
+
"open",
|
214
|
+
"high",
|
215
|
+
"low",
|
216
|
+
"close",
|
217
|
+
"volume",
|
218
|
+
]
|
219
|
+
return df[[col for col in standard_columns if col in df.columns]]
|
@@ -0,0 +1,78 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
import pandas as pd
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
|
6
|
+
def validate_insider_data(func):
|
7
|
+
"""Decorator to validate insider trading data returned by data providers"""
|
8
|
+
|
9
|
+
def wrapper(*args, **kwargs):
|
10
|
+
df = func(*args, **kwargs)
|
11
|
+
|
12
|
+
if not isinstance(df, pd.DataFrame):
|
13
|
+
raise ValueError("Returned data must be a pandas DataFrame")
|
14
|
+
|
15
|
+
# Required fields for insider trading data
|
16
|
+
required_fields = {
|
17
|
+
"symbol",
|
18
|
+
"issuer",
|
19
|
+
"name",
|
20
|
+
"transaction_date",
|
21
|
+
"transaction_shares",
|
22
|
+
"transaction_price_per_share",
|
23
|
+
}
|
24
|
+
if not required_fields.issubset(df.columns):
|
25
|
+
missing = required_fields - set(df.columns)
|
26
|
+
raise ValueError(f"Missing required fields: {missing}")
|
27
|
+
|
28
|
+
# Validate timestamp if present
|
29
|
+
if "transaction_date" in df.columns:
|
30
|
+
if not pd.api.types.is_datetime64_any_dtype(df["transaction_date"]):
|
31
|
+
raise ValueError("transaction_date must be datetime64 dtype")
|
32
|
+
if (
|
33
|
+
df["transaction_date"].dt.tz is None
|
34
|
+
or str(df["transaction_date"].dt.tz) != "UTC"
|
35
|
+
):
|
36
|
+
raise ValueError("transaction_date must be in UTC timezone")
|
37
|
+
|
38
|
+
# Validate numeric fields
|
39
|
+
numeric_fields = {
|
40
|
+
"transaction_shares",
|
41
|
+
"transaction_price_per_share",
|
42
|
+
"transaction_value",
|
43
|
+
"shares_owned_before_transaction",
|
44
|
+
"shares_owned_after_transaction",
|
45
|
+
}
|
46
|
+
for field in numeric_fields & set(df.columns):
|
47
|
+
if not pd.api.types.is_numeric_dtype(df[field]):
|
48
|
+
raise ValueError(f"{field} must be numeric")
|
49
|
+
|
50
|
+
return df
|
51
|
+
|
52
|
+
return wrapper
|
53
|
+
|
54
|
+
|
55
|
+
class InsiderDataProvider(ABC):
|
56
|
+
def __init__(self, symbol: Optional[str] = None) -> None:
|
57
|
+
self.symbol = symbol
|
58
|
+
|
59
|
+
@abstractmethod
|
60
|
+
def get_inner_trade_data(self, symbol: Optional[str] = None) -> pd.DataFrame:
|
61
|
+
"""Fetches insider trade data
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
pd.DataFrame:
|
65
|
+
- symbol: 股票代码
|
66
|
+
- issuer: 股票名称
|
67
|
+
- name: 变动人
|
68
|
+
- title: 董监高职务
|
69
|
+
- transaction_date: 变动日期(UTC时区)
|
70
|
+
- transaction_shares: 变动股数
|
71
|
+
- transaction_price_per_share: 成交均价
|
72
|
+
- shares_owned_after_transaction: 变动后持股数
|
73
|
+
- relationship: 与董监高关系
|
74
|
+
- is_board_director: 是否为董事会成员
|
75
|
+
- transaction_value: 交易金额(变动股数*成交均价)
|
76
|
+
- shares_owned_before_transaction: 变动前持股数
|
77
|
+
"""
|
78
|
+
pass
|
@@ -0,0 +1,44 @@
|
|
1
|
+
from .xueqiu import XueQiuInsider
|
2
|
+
from .base import InsiderDataProvider
|
3
|
+
|
4
|
+
|
5
|
+
class InsiderDataFactory:
|
6
|
+
"""
|
7
|
+
Factory class for creating insider data providers
|
8
|
+
"""
|
9
|
+
|
10
|
+
_providers = {
|
11
|
+
"xueqiu": XueQiuInsider,
|
12
|
+
}
|
13
|
+
|
14
|
+
@classmethod
|
15
|
+
def get_provider(cls, provider_name: str, **kwargs) -> InsiderDataProvider:
|
16
|
+
"""
|
17
|
+
Get an insider data provider by name
|
18
|
+
|
19
|
+
Args:
|
20
|
+
provider_name: Name of the provider (e.g., 'xueqiu')
|
21
|
+
**kwargs: Additional arguments to pass to the provider's constructor
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
InsiderDataProvider: An instance of the requested provider
|
25
|
+
|
26
|
+
Raises:
|
27
|
+
ValueError: If the requested provider is not found
|
28
|
+
"""
|
29
|
+
provider_class = cls._providers.get(provider_name.lower())
|
30
|
+
if not provider_class:
|
31
|
+
raise ValueError(f"Unknown insider data provider: {provider_name}")
|
32
|
+
|
33
|
+
return provider_class(**kwargs)
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def register_provider(cls, name: str, provider_class: type):
|
37
|
+
"""
|
38
|
+
Register a new insider data provider
|
39
|
+
|
40
|
+
Args:
|
41
|
+
name: Name to associate with this provider
|
42
|
+
provider_class: The provider class to register
|
43
|
+
"""
|
44
|
+
cls._providers[name.lower()] = provider_class
|