mns-common 1.5.1.8__py3-none-any.whl → 1.5.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mns-common might be problematic. Click here for more details.

@@ -0,0 +1,504 @@
1
+ import sys
2
+ import os
3
+
4
+ file_path = os.path.abspath(__file__)
5
+ end = file_path.index('mns') + 16
6
+ project_path = file_path[0:end]
7
+ sys.path.append(project_path)
8
+
9
+ import pandas as pd
10
+ from loguru import logger
11
+ import requests
12
+ import time
13
+ import numpy as np
14
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
15
+ import concurrent.futures
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from threading import Lock
18
+ import mns_common.utils.data_frame_util as data_frame_util
19
+
20
+ # 最大返回条数
21
+ max_number = 1200
22
+ # 最小返回条数
23
+ min_number = 1000
24
+ # 分页条数
25
+ page_number = 100
26
+
27
+
28
+ def get_etf_count(pn, proxies, page_size, time_out):
29
+ """
30
+ 东方财富-ETF 实时行情
31
+ https://quote.eastmoney.com/center/gridlist.html#fund_etf
32
+ :return: ETF 实时行情
33
+ :rtype: pandas.DataFrame
34
+ """
35
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
36
+ url = "https://88.push2.eastmoney.com/api/qt/clist/get"
37
+ params = {
38
+ "pn": str(pn),
39
+ "pz": str(page_size),
40
+ "po": "1",
41
+ "np": "3",
42
+ "ut": "bd1d9ddb04089700cf9c27f6f7426281",
43
+ "fltt": "2",
44
+ "invt": "2",
45
+ "wbp2u": "|0|0|0|web",
46
+ "fid": "f12",
47
+ "fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
48
+ "fields": (
49
+ "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,"
50
+ "f12,f13,f14,f15,f16,f17,f18,f20,f21,"
51
+ "f23,f24,f25,f26,f22,f11,f30,f31,f32,f33,"
52
+ "f34,f35,f38,f62,f63,f64,f65,f66,f69,"
53
+ "f72,f75,f78,f81,f84,f87,f115,f124,f128,"
54
+ "f136,f152,f184,f297,f402,f441"
55
+ ),
56
+ "_": str(current_timestamp),
57
+ }
58
+ try:
59
+ if proxies is None:
60
+ r = requests.get(url, params, timeout=time_out)
61
+ else:
62
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
63
+ data_json = r.json()
64
+ total_number = int(data_json['data']['total'])
65
+ return total_number
66
+ except Exception as e:
67
+ logger.error("获取ETF列表,实时行情异常:{}", e)
68
+ return pd.DataFrame()
69
+
70
+
71
+ def get_fund_etf_page_df(pn, proxies, page_size, time_out) -> pd.DataFrame:
72
+ """
73
+ 东方财富-ETF 实时行情
74
+ https://quote.eastmoney.com/center/gridlist.html#fund_etf
75
+ :return: ETF 实时行情
76
+ :rtype: pandas.DataFrame
77
+ """
78
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
79
+ url = "https://88.push2.eastmoney.com/api/qt/clist/get"
80
+ params = {
81
+ "pn": str(pn),
82
+ "pz": str(page_size),
83
+ "po": "1",
84
+ "np": "3",
85
+ "ut": "bd1d9ddb04089700cf9c27f6f7426281",
86
+ "fltt": "2",
87
+ "invt": "2",
88
+ "wbp2u": "|0|0|0|web",
89
+ "fid": "f12",
90
+ "fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
91
+ "fields": (
92
+ "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,"
93
+ "f12,f13,f14,f15,f16,f17,f18,f20,f21,"
94
+ "f23,f24,f25,f26,f22,f11,f30,f31,f32,f33,"
95
+ "f34,f35,f38,f62,f63,f64,f65,f66,f69,"
96
+ "f72,f75,f78,f81,f84,f87,f115,f124,f128,"
97
+ "f136,f152,f184,f297,f402,f441"
98
+ ),
99
+ "_": str(current_timestamp),
100
+ }
101
+ try:
102
+ if proxies is None:
103
+ r = requests.get(url, params, timeout=time_out)
104
+ else:
105
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
106
+ data_json = r.json()
107
+ if pn == 1:
108
+ try:
109
+ global max_number
110
+ max_number = int(data_json['data']['total'])
111
+ except Exception as e:
112
+ logger.error("获取第{}页ETF列表异常:{}", page_size, str(e))
113
+ return pd.DataFrame()
114
+
115
+ temp_df = pd.DataFrame(data_json["data"]["diff"])
116
+ temp_df.rename(
117
+ columns={
118
+ "f26": "上市时间",
119
+ "f12": "代码",
120
+ "f14": "名称",
121
+ "f2": "最新价",
122
+ "f4": "涨跌额",
123
+ "f3": "涨跌幅",
124
+ "f5": "成交量",
125
+ "f6": "成交额",
126
+ "f7": "振幅",
127
+ "f17": "开盘价",
128
+ "f15": "最高价",
129
+ "f16": "最低价",
130
+ "f18": "昨收",
131
+ "f8": "换手率",
132
+ "f10": "量比",
133
+ "f30": "现手",
134
+ "f31": "买一",
135
+ "f32": "卖一",
136
+ "f33": "委比",
137
+ "f34": "外盘",
138
+ "f35": "内盘",
139
+ "f62": "主力净流入-净额",
140
+ "f184": "主力净流入-净占比",
141
+ "f66": "超大单净流入-净额",
142
+ "f69": "超大单净流入-净占比",
143
+ "f72": "大单净流入-净额",
144
+ "f75": "大单净流入-净占比",
145
+ "f78": "中单净流入-净额",
146
+ "f81": "中单净流入-净占比",
147
+ "f84": "小单净流入-净额",
148
+ "f87": "小单净流入-净占比",
149
+ "f38": "最新份额",
150
+ "f21": "流通市值",
151
+ "f20": "总市值",
152
+ "f402": "基金折价率",
153
+ "f441": "IOPV实时估值",
154
+ "f297": "数据日期",
155
+ "f124": "更新时间",
156
+ "f13": "market"
157
+ },
158
+ inplace=True,
159
+ )
160
+ temp_df = temp_df[
161
+ [
162
+ "代码",
163
+ "名称",
164
+ "最新价",
165
+ "IOPV实时估值",
166
+ "基金折价率",
167
+ "涨跌额",
168
+ "涨跌幅",
169
+ "成交量",
170
+ "成交额",
171
+ "开盘价",
172
+ "最高价",
173
+ "最低价",
174
+ "昨收",
175
+ "振幅",
176
+ "换手率",
177
+ "量比",
178
+ "委比",
179
+ "外盘",
180
+ "内盘",
181
+ "主力净流入-净额",
182
+ "主力净流入-净占比",
183
+ "超大单净流入-净额",
184
+ "超大单净流入-净占比",
185
+ "大单净流入-净额",
186
+ "大单净流入-净占比",
187
+ "中单净流入-净额",
188
+ "中单净流入-净占比",
189
+ "小单净流入-净额",
190
+ "小单净流入-净占比",
191
+ "现手",
192
+ "买一",
193
+ "卖一",
194
+ "最新份额",
195
+ "流通市值",
196
+ "总市值",
197
+ "数据日期",
198
+ "更新时间",
199
+ "market",
200
+ "上市时间"
201
+ ]
202
+ ]
203
+ temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce")
204
+ temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce")
205
+ temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
206
+ temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
207
+ temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
208
+ temp_df["开盘价"] = pd.to_numeric(temp_df["开盘价"], errors="coerce")
209
+ temp_df["最高价"] = pd.to_numeric(temp_df["最高价"], errors="coerce")
210
+ temp_df["最低价"] = pd.to_numeric(temp_df["最低价"], errors="coerce")
211
+ temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce")
212
+ temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce")
213
+ temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce")
214
+ temp_df["委比"] = pd.to_numeric(temp_df["委比"], errors="coerce")
215
+ temp_df["外盘"] = pd.to_numeric(temp_df["外盘"], errors="coerce")
216
+ temp_df["内盘"] = pd.to_numeric(temp_df["内盘"], errors="coerce")
217
+ temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce")
218
+ temp_df["总市值"] = pd.to_numeric(temp_df["总市值"], errors="coerce")
219
+ temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce")
220
+ temp_df["现手"] = pd.to_numeric(temp_df["现手"], errors="coerce")
221
+ temp_df["买一"] = pd.to_numeric(temp_df["买一"], errors="coerce")
222
+ temp_df["卖一"] = pd.to_numeric(temp_df["卖一"], errors="coerce")
223
+ temp_df["最新份额"] = pd.to_numeric(temp_df["最新份额"], errors="coerce")
224
+ temp_df["IOPV实时估值"] = pd.to_numeric(temp_df["IOPV实时估值"], errors="coerce")
225
+ temp_df["基金折价率"] = pd.to_numeric(temp_df["基金折价率"], errors="coerce")
226
+ temp_df["主力净流入-净额"] = pd.to_numeric(
227
+ temp_df["主力净流入-净额"], errors="coerce"
228
+ )
229
+ temp_df["主力净流入-净占比"] = pd.to_numeric(
230
+ temp_df["主力净流入-净占比"], errors="coerce"
231
+ )
232
+ temp_df["超大单净流入-净额"] = pd.to_numeric(
233
+ temp_df["超大单净流入-净额"], errors="coerce"
234
+ )
235
+ temp_df["超大单净流入-净占比"] = pd.to_numeric(
236
+ temp_df["超大单净流入-净占比"], errors="coerce"
237
+ )
238
+ temp_df["大单净流入-净额"] = pd.to_numeric(
239
+ temp_df["大单净流入-净额"], errors="coerce"
240
+ )
241
+ temp_df["大单净流入-净占比"] = pd.to_numeric(
242
+ temp_df["大单净流入-净占比"], errors="coerce"
243
+ )
244
+ temp_df["中单净流入-净额"] = pd.to_numeric(
245
+ temp_df["中单净流入-净额"], errors="coerce"
246
+ )
247
+ temp_df["中单净流入-净占比"] = pd.to_numeric(
248
+ temp_df["中单净流入-净占比"], errors="coerce"
249
+ )
250
+ temp_df["小单净流入-净额"] = pd.to_numeric(
251
+ temp_df["小单净流入-净额"], errors="coerce"
252
+ )
253
+ temp_df["小单净流入-净占比"] = pd.to_numeric(
254
+ temp_df["小单净流入-净占比"], errors="coerce"
255
+ )
256
+ temp_df["数据日期"] = pd.to_datetime(
257
+ temp_df["数据日期"], format="%Y%m%d", errors="coerce"
258
+ )
259
+ temp_df["更新时间"] = (
260
+ pd.to_datetime(temp_df["更新时间"], unit="s", errors="coerce")
261
+ .dt.tz_localize("UTC")
262
+ .dt.tz_convert("Asia/Shanghai")
263
+ )
264
+
265
+ return temp_df
266
+ except Exception as e:
267
+ logger.error("获取ETF列表,实时行情异常:{}", e)
268
+ return pd.DataFrame()
269
+
270
+
271
+ def thread_pool_executor(proxies):
272
+ """
273
+ 使用多线程获取所有ETF数据
274
+ """
275
+ # 计算总页数,假设总共有1000条数据,每页200条
276
+
277
+ per_page = page_number
278
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
279
+
280
+ # 创建线程池
281
+ with ThreadPoolExecutor(max_workers=3) as executor:
282
+ # 提交任务,获取每页数据
283
+ futures = [executor.submit(get_fund_etf_page_df, pn, proxies)
284
+ for pn in range(1, total_pages + 1)]
285
+
286
+ # 收集结果
287
+ results = []
288
+ for future in futures:
289
+ result = future.result()
290
+ if not result.empty:
291
+ results.append(result)
292
+
293
+ # 合并所有页面的数据
294
+ if results:
295
+ return pd.concat(results, ignore_index=True)
296
+ else:
297
+ return pd.DataFrame()
298
+
299
+
300
+ def rename_etf(fund_etf_spot_em_df):
301
+ fund_etf_spot_em_df = fund_etf_spot_em_df.rename(columns={
302
+ "上市时间": "list_date",
303
+ "最新价": "now_price",
304
+ "涨跌幅": "chg",
305
+ "基金折价率": "fund_discount_rate",
306
+ "振幅": "pct_chg",
307
+ "涨跌额": "range",
308
+ "成交额": "amount",
309
+ "成交量": "volume",
310
+ "换手率": "exchange",
311
+ "量比": "quantity_ratio",
312
+ "代码": "symbol",
313
+ "名称": "name",
314
+ "最高价": "high",
315
+ "最低价": "low",
316
+ "开盘价": "open",
317
+ "昨收": "yesterday_price",
318
+ "总市值": "total_mv",
319
+ "流通市值": "flow_mv",
320
+ "委比": "wei_bi",
321
+ "外盘": "outer_disk",
322
+ "内盘": "inner_disk",
323
+ "主力净流入-净额": "today_main_net_inflow",
324
+ "超大单净流入-净额": "super_large_order_net_inflow",
325
+ "超大单净流入-净占比": "super_large_order_net_inflow_ratio",
326
+ "大单净流入-净额": "large_order_net_inflow",
327
+ # "f78": "medium_order_net_inflow",
328
+ # "f84": "small_order_net_inflow",
329
+ # "f103": "concept",
330
+ "主力净流入-净占比": "today_main_net_inflow_ratio",
331
+ "买一": "buy_1_num",
332
+ "卖一": "sell_1_num",
333
+ "最新份额": "latest_share",
334
+ "数据日期": "data_time",
335
+ "更新时间": "update_time"
336
+ })
337
+
338
+ fund_etf_spot_em_df = fund_etf_spot_em_df[[
339
+ "now_price",
340
+ "chg",
341
+ "fund_discount_rate",
342
+ "pct_chg",
343
+ "range",
344
+ "amount",
345
+ "volume",
346
+ "exchange",
347
+ "quantity_ratio",
348
+ "symbol",
349
+ "name",
350
+ "high",
351
+ "low",
352
+ "open",
353
+ "yesterday_price",
354
+ "total_mv",
355
+ "flow_mv",
356
+ "wei_bi",
357
+ "outer_disk",
358
+ "inner_disk",
359
+ "today_main_net_inflow",
360
+ "super_large_order_net_inflow",
361
+ "super_large_order_net_inflow_ratio",
362
+ "large_order_net_inflow",
363
+ "today_main_net_inflow_ratio",
364
+ "buy_1_num",
365
+ "sell_1_num",
366
+ "latest_share",
367
+ "data_time",
368
+ "update_time",
369
+ "market",
370
+ 'list_date'
371
+ ]]
372
+
373
+ fund_etf_spot_em_df['disk_ratio'] = round(
374
+ (fund_etf_spot_em_df['outer_disk'] - fund_etf_spot_em_df['inner_disk']) / fund_etf_spot_em_df['inner_disk'], 2)
375
+
376
+ fund_etf_spot_em_df.loc[:, 'reference_main_inflow'] = round(
377
+ (fund_etf_spot_em_df['flow_mv'] * (1 / 1000)), 2)
378
+
379
+ fund_etf_spot_em_df.loc[:, 'main_inflow_multiple'] = round(
380
+ (fund_etf_spot_em_df['today_main_net_inflow'] / fund_etf_spot_em_df['reference_main_inflow']), 2)
381
+
382
+ fund_etf_spot_em_df.loc[:, 'super_main_inflow_multiple'] = round(
383
+ (fund_etf_spot_em_df['super_large_order_net_inflow'] / fund_etf_spot_em_df['reference_main_inflow']), 2)
384
+ fund_etf_spot_em_df['large_inflow_multiple'] = round(
385
+ (fund_etf_spot_em_df['large_order_net_inflow'] / fund_etf_spot_em_df['reference_main_inflow']), 2)
386
+
387
+ fund_etf_spot_em_df['disk_diff_amount'] = round(
388
+ (fund_etf_spot_em_df['outer_disk'] - fund_etf_spot_em_df['inner_disk']) * fund_etf_spot_em_df[
389
+ "now_price"] * 100,
390
+ 2)
391
+
392
+ fund_etf_spot_em_df['disk_diff_amount_exchange'] = round(
393
+ (fund_etf_spot_em_df['disk_diff_amount'] / fund_etf_spot_em_df['reference_main_inflow']), 2)
394
+ fund_etf_spot_em_df.loc[:, 'sum_main_inflow_disk'] = fund_etf_spot_em_df['main_inflow_multiple'] + \
395
+ fund_etf_spot_em_df['disk_diff_amount_exchange']
396
+ fund_etf_spot_em_df = fund_etf_spot_em_df.fillna(0)
397
+
398
+ fund_etf_spot_em_df.replace([np.inf, -np.inf], 0, inplace=True)
399
+ return fund_etf_spot_em_df
400
+
401
+
402
+ def get_etf_real_time_quotes(proxies):
403
+ # 获取第一页数据
404
+ page_one_df = get_fund_etf_page_df(1, proxies)
405
+ # 数据接口正常返回5600以上的数量
406
+ if page_one_df.shape[0] > min_number:
407
+ page_one_df = rename_etf(page_one_df)
408
+ page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
409
+ return page_one_df
410
+ else:
411
+ page_df = thread_pool_executor(proxies)
412
+ page_df = rename_etf(page_df)
413
+ page_df.drop_duplicates('symbol', keep='last', inplace=True)
414
+ return page_df
415
+
416
+
417
+ def repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread):
418
+ per_page = page_number
419
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
420
+ result_df = pd.DataFrame()
421
+
422
+ # 创建线程锁以确保线程安全
423
+ df_lock = Lock()
424
+
425
+ # 计算每个线程处理的页数范围
426
+ def process_page_range(start_page, end_page, thread_id):
427
+ nonlocal result_df
428
+ local_df = pd.DataFrame()
429
+ current_page = start_page
430
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
431
+
432
+ while current_page <= end_page and current_page <= total_pages:
433
+ proxies = {"https": proxy_ip, "http": proxy_ip}
434
+ try:
435
+ page_df = get_fund_etf_page_df(current_page, proxies, page_number, time_out)
436
+ if data_frame_util.is_not_empty(page_df):
437
+ local_df = pd.concat([local_df, page_df])
438
+ logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
439
+ current_page += 1
440
+ else:
441
+ time.sleep(0.2)
442
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
443
+ logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
444
+ except BaseException as e:
445
+ time.sleep(1)
446
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
447
+ logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
448
+
449
+ with df_lock:
450
+ result_df = pd.concat([result_df, local_df])
451
+ return len(local_df)
452
+
453
+ # 计算每个线程的页面范围
454
+ page_ranges = []
455
+ for i in range(num_threads):
456
+ start_page = i * pages_per_thread + 1
457
+ end_page = (i + 1) * pages_per_thread
458
+ if start_page > total_pages:
459
+ break
460
+ page_ranges.append((start_page, end_page, i + 1))
461
+
462
+ # 使用线程池执行任务
463
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
464
+ # 提交所有任务
465
+ futures = [
466
+ executor.submit(process_page_range, start, end, tid)
467
+ for start, end, tid in page_ranges
468
+ ]
469
+
470
+ # 等待所有任务完成并获取结果
471
+ results = []
472
+ for future in concurrent.futures.as_completed(futures):
473
+ try:
474
+ result = future.result()
475
+ results.append(result)
476
+ except Exception as e:
477
+ logger.error("线程执行出错: {}", e)
478
+
479
+ return rename_etf(result_df)
480
+
481
+
482
+ def get_etf_real_time_quotes(time_out, pages_per_thread):
483
+ try_numer = 3
484
+ while try_numer > 0:
485
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
486
+ proxies = {"https": proxy_ip,
487
+ "http": proxy_ip}
488
+
489
+ max_number = get_etf_count(1, proxies, 20, time_out)
490
+ if max_number>0:
491
+ break
492
+ try_numer = try_numer - 1
493
+ if max_number == 0:
494
+ return pd.DataFrame()
495
+
496
+ total_pages = (max_number + page_number - 1) // page_number # 向上取整
497
+
498
+ num_threads = int((total_pages / pages_per_thread) + 1)
499
+ return repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread)
500
+
501
+
502
+ if __name__ == '__main__':
503
+ test_df = get_etf_real_time_quotes(30, 6)
504
+ print(test_df)