mns-common 1.5.1.8__py3-none-any.whl → 1.5.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mns-common might be problematic. Click here for more details.

@@ -0,0 +1,469 @@
1
+ import os
2
+ import sys
3
+
4
+ file_path = os.path.abspath(__file__)
5
+ end = file_path.index('mns') + 16
6
+ project_path = file_path[0:end]
7
+ sys.path.append(project_path)
8
+ import pandas as pd
9
+ from loguru import logger
10
+ import requests
11
+ import time
12
+ import numpy as np
13
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
14
+ import concurrent.futures
15
+ from concurrent.futures import ThreadPoolExecutor
16
+ from threading import Lock
17
+ import mns_common.utils.data_frame_util as data_frame_util
18
+ import json
19
+ import mns_common.component.cookie.cookie_info_service as cookie_info_service
20
+
21
+ # 最大返回条数
22
+ max_number = 4500
23
+ # 最小返回条数
24
+ min_number = 4400
25
+ # 分页条数
26
+ page_number = 100
27
+
28
+ fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,f18,f20,f21,f26,"
29
+ "f33,f34,f35,f62,f66,f69,f72,f100,f184,f211,f212")
30
+
31
+
32
+ def hk_real_time_quotes_page_df(cookie, pn, proxies):
33
+ try:
34
+ headers = {
35
+ 'Cookie': cookie
36
+ }
37
+
38
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
39
+
40
+ url_new = ('https://61.push2.eastmoney.com/api/qt/clist/get?cb=jQuery112409497467688484127_' + str(
41
+ current_timestamp) +
42
+ '&pn=' + str(pn) +
43
+ '&pz=50000'
44
+ '&po=1'
45
+ '&np=3'
46
+ '&ut=bd1d9ddb04089700cf9c27f6f7426281'
47
+ '&fltt=2'
48
+ '&invt=2'
49
+ '&wbp2u=4253366368931142|0|1|0|web'
50
+ '&fid=f12'
51
+ '&fs=m:116+t:3,m:116+t:4,m:116+t:1,m:116+t:2'
52
+ '&fields=' + fields +
53
+ '&_=' + str(current_timestamp))
54
+
55
+ if proxies is None:
56
+ r = requests.get(url_new, headers=headers)
57
+ else:
58
+ r = requests.get(url_new, headers=headers, proxies=proxies)
59
+ result = r.content.decode("utf-8")
60
+
61
+ if pn == 1:
62
+ try:
63
+ begin_index_total = result.index('"total":')
64
+
65
+ end_index_total = result.index('"diff"')
66
+ global max_number
67
+ max_number = int(result[begin_index_total + 8:end_index_total - 1])
68
+ except Exception as e:
69
+ logger.error(f"获取第{pn}页港股列表异常: {e}")
70
+ return pd.DataFrame()
71
+
72
+ startIndex = result.index('"diff"')
73
+ endIndex = result.index('}]}')
74
+
75
+ result = result[startIndex + 7:endIndex + 2]
76
+
77
+ data_json = json.loads(result)
78
+
79
+ temp_df = pd.DataFrame(data_json)
80
+
81
+ temp_df = temp_df.rename(columns={
82
+
83
+ "f12": "symbol",
84
+ "f14": "name",
85
+ "f3": "chg",
86
+ "f2": "now_price",
87
+ "f5": "volume",
88
+ "f6": "amount",
89
+ "f8": "exchange",
90
+ "f10": "quantity_ratio",
91
+ "f22": "up_speed",
92
+ "f11": "up_speed_05",
93
+
94
+ "f15": "high",
95
+ "f16": "low",
96
+ "f17": "open",
97
+ "f18": "yesterday_price",
98
+ "f20": "total_mv",
99
+ "f21": "flow_mv",
100
+ "f26": "list_date",
101
+ "f33": "wei_bi",
102
+ "f34": "outer_disk",
103
+ "f35": "inner_disk",
104
+ "f62": "today_main_net_inflow",
105
+ "f66": "super_large_order_net_inflow",
106
+ "f69": "super_large_order_net_inflow_ratio",
107
+ "f72": "large_order_net_inflow",
108
+ # "f78": "medium_order_net_inflow",
109
+ # "f84": "small_order_net_inflow",
110
+ "f100": "industry",
111
+ # "f103": "concept",
112
+ "f184": "today_main_net_inflow_ratio",
113
+ "f352": "average_price",
114
+ "f211": "buy_1_num",
115
+ "f212": "sell_1_num"
116
+ })
117
+ temp_df.loc[temp_df['buy_1_num'] == '-', 'buy_1_num'] = 0
118
+ temp_df.loc[temp_df['sell_1_num'] == '-', 'sell_1_num'] = 0
119
+ temp_df.loc[temp_df['up_speed_05'] == '-', 'up_speed_05'] = 0
120
+ temp_df.loc[temp_df['up_speed'] == '-', 'up_speed'] = 0
121
+ temp_df.loc[temp_df['average_price'] == '-', 'average_price'] = 0
122
+ temp_df.loc[temp_df['wei_bi'] == '-', 'wei_bi'] = 0
123
+ temp_df.loc[temp_df['yesterday_price'] == '-', 'yesterday_price'] = 0
124
+ temp_df.loc[temp_df['now_price'] == '-', 'now_price'] = 0
125
+ temp_df.loc[temp_df['chg'] == '-', 'chg'] = 0
126
+ temp_df.loc[temp_df['volume'] == '-', 'volume'] = 0
127
+ temp_df.loc[temp_df['amount'] == '-', 'amount'] = 0
128
+ temp_df.loc[temp_df['exchange'] == '-', 'exchange'] = 0
129
+ temp_df.loc[temp_df['quantity_ratio'] == '-', 'quantity_ratio'] = 0
130
+ temp_df.loc[temp_df['high'] == '-', 'high'] = 0
131
+ temp_df.loc[temp_df['low'] == '-', 'low'] = 0
132
+ temp_df.loc[temp_df['open'] == '-', 'open'] = 0
133
+ temp_df.loc[temp_df['total_mv'] == '-', 'total_mv'] = 0
134
+ temp_df.loc[temp_df['flow_mv'] == '-', 'flow_mv'] = 0
135
+ temp_df.loc[temp_df['inner_disk'] == '-', 'inner_disk'] = 0
136
+ temp_df.loc[temp_df['outer_disk'] == '-', 'outer_disk'] = 0
137
+ temp_df.loc[temp_df['today_main_net_inflow_ratio'] == '-', 'today_main_net_inflow_ratio'] = 0
138
+ temp_df.loc[temp_df['today_main_net_inflow'] == '-', 'today_main_net_inflow'] = 0
139
+ temp_df.loc[temp_df['super_large_order_net_inflow'] == '-', 'super_large_order_net_inflow'] = 0
140
+ temp_df.loc[temp_df['super_large_order_net_inflow_ratio'] == '-', 'super_large_order_net_inflow_ratio'] = 0
141
+ temp_df.loc[temp_df['large_order_net_inflow'] == '-', 'large_order_net_inflow'] = 0
142
+ # temp_df.loc[temp_df['medium_order_net_inflow'] == '-', 'medium_order_net_inflow'] = 0
143
+ # temp_df.loc[temp_df['small_order_net_inflow'] == '-', 'small_order_net_inflow'] = 0
144
+
145
+ temp_df["list_date"] = pd.to_numeric(temp_df["list_date"], errors="coerce")
146
+ temp_df["wei_bi"] = pd.to_numeric(temp_df["wei_bi"], errors="coerce")
147
+ temp_df["average_price"] = pd.to_numeric(temp_df["average_price"], errors="coerce")
148
+ temp_df["yesterday_price"] = pd.to_numeric(temp_df["yesterday_price"], errors="coerce")
149
+ temp_df["now_price"] = pd.to_numeric(temp_df["now_price"], errors="coerce")
150
+ temp_df["chg"] = pd.to_numeric(temp_df["chg"], errors="coerce")
151
+ temp_df["volume"] = pd.to_numeric(temp_df["volume"], errors="coerce")
152
+ temp_df["amount"] = pd.to_numeric(temp_df["amount"], errors="coerce")
153
+ temp_df["exchange"] = pd.to_numeric(temp_df["exchange"], errors="coerce")
154
+ temp_df["quantity_ratio"] = pd.to_numeric(temp_df["quantity_ratio"], errors="coerce")
155
+ temp_df["high"] = pd.to_numeric(temp_df["high"], errors="coerce")
156
+ temp_df["low"] = pd.to_numeric(temp_df["low"], errors="coerce")
157
+ temp_df["open"] = pd.to_numeric(temp_df["open"], errors="coerce")
158
+ temp_df["total_mv"] = pd.to_numeric(temp_df["total_mv"], errors="coerce")
159
+ temp_df["flow_mv"] = pd.to_numeric(temp_df["flow_mv"], errors="coerce")
160
+ temp_df["outer_disk"] = pd.to_numeric(temp_df["outer_disk"], errors="coerce")
161
+ temp_df["inner_disk"] = pd.to_numeric(temp_df["inner_disk"], errors="coerce")
162
+ temp_df["today_main_net_inflow"] = pd.to_numeric(temp_df["today_main_net_inflow"], errors="coerce")
163
+ temp_df["super_large_order_net_inflow"] = pd.to_numeric(temp_df["super_large_order_net_inflow"],
164
+ errors="coerce")
165
+ temp_df["super_large_order_net_inflow_ratio"] = pd.to_numeric(temp_df["super_large_order_net_inflow_ratio"],
166
+ errors="coerce")
167
+ temp_df["large_order_net_inflow"] = pd.to_numeric(temp_df["large_order_net_inflow"],
168
+ errors="coerce")
169
+ # temp_df["medium_order_net_inflow"] = pd.to_numeric(temp_df["medium_order_net_inflow"],
170
+ # errors="coerce")
171
+ # temp_df["small_order_net_inflow"] = pd.to_numeric(temp_df["small_order_net_inflow"], errors="coerce")
172
+
173
+ # 大单比例
174
+ temp_df['large_order_net_inflow_ratio'] = round((temp_df['large_order_net_inflow'] / temp_df['amount']) * 100,
175
+ 2)
176
+
177
+ # 外盘是内盘倍数
178
+ temp_df['disk_ratio'] = round((temp_df['outer_disk'] - temp_df['inner_disk']) / temp_df['inner_disk'], 2)
179
+ # 只有外盘没有内盘
180
+ temp_df.loc[temp_df["inner_disk"] == 0, ['disk_ratio']] = 1688
181
+ temp_df['disk_diff_amount'] = round(
182
+ (temp_df['outer_disk'] - temp_df['inner_disk']) * temp_df[
183
+ "average_price"],
184
+ 2)
185
+ return temp_df
186
+ except Exception as e:
187
+ logger.error("获取港股列表,实时行情异常:{}", e)
188
+ return pd.DataFrame()
189
+
190
+
191
+ def thread_pool_executor(cookie, proxies):
192
+ """
193
+ 使用多线程获取所有ETF数据
194
+ """
195
+ # 计算总页数,假设总共有1000条数据,每页200条
196
+
197
+ per_page = page_number
198
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
199
+
200
+ # 创建线程池
201
+ with ThreadPoolExecutor(max_workers=3) as executor:
202
+ # 提交任务,获取每页数据
203
+ futures = [executor.submit(hk_real_time_quotes_page_df, cookie, pn, proxies)
204
+ for pn in range(1, total_pages + 1)]
205
+
206
+ # 收集结果
207
+ results = []
208
+ for future in futures:
209
+ result = future.result()
210
+ if not result.empty:
211
+ results.append(result)
212
+
213
+ # 合并所有页面的数据
214
+ if results:
215
+ return pd.concat(results, ignore_index=True)
216
+ else:
217
+ return pd.DataFrame()
218
+
219
+
220
+ def get_hk_real_time_quotes(cookie, proxies):
221
+ # 获取第一页数据
222
+ page_one_df = hk_real_time_quotes_page_df(cookie, 1, proxies)
223
+ # 数据接口正常返回5600以上的数量
224
+ if page_one_df.shape[0] > min_number:
225
+ page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
226
+ return page_one_df
227
+ else:
228
+ page_df = thread_pool_executor(cookie, proxies)
229
+ page_df.drop_duplicates('symbol', keep='last', inplace=True)
230
+ return page_df
231
+
232
+
233
+ # 获取港股通名单 todo 被封以后替换
234
+ def stock_hk_ggt_components_em(cookie, pn, proxies, page_size, time_out) -> pd.DataFrame:
235
+ """
236
+ 东方财富网-行情中心-港股市场-港股通成份股
237
+ https://quote.eastmoney.com/center/gridlist.html#hk_components
238
+ :return: 港股通成份股
239
+ :rtype: pandas.DataFrame
240
+ """
241
+ headers = {
242
+ 'Cookie': cookie
243
+ }
244
+ url = "https://33.push2.eastmoney.com/api/qt/clist/get"
245
+ params = {
246
+ "pn": str(pn),
247
+ "pz": str(page_size),
248
+ "po": "1",
249
+ "np": "2",
250
+ "ut": "bd1d9ddb04089700cf9c27f6f7426281",
251
+ "fltt": "2",
252
+ "fid": "f3",
253
+ "fs": "b:DLMK0146,b:DLMK0144",
254
+ "fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f23,f24,"
255
+ "f25,f26,f22,f33,f11,f62,f128,f136,f115,f152",
256
+ "_": "1639974456250",
257
+ }
258
+ try:
259
+ if proxies is None:
260
+ r = requests.get(url, params=params, timeout=time_out, headers=headers)
261
+ else:
262
+ r = requests.get(url, params=params, proxies=proxies, timeout=time_out, headers=headers)
263
+
264
+ data_json = r.json()
265
+ temp_df = pd.DataFrame(data_json["data"]["diff"]).T
266
+ temp_df.reset_index(inplace=True)
267
+ temp_df["index"] = temp_df.index + 1
268
+ return temp_df
269
+ except Exception as e:
270
+ logger.error("获取港股通列表:{}", e)
271
+
272
+
273
+ def rename_hg_ggt(temp_df):
274
+ temp_df.columns = [
275
+ "序号",
276
+ "-",
277
+ "最新价",
278
+ "涨跌幅",
279
+ "涨跌额",
280
+ "成交量",
281
+ "成交额",
282
+ "-",
283
+ "-",
284
+ "-",
285
+ "-",
286
+ "-",
287
+ "代码",
288
+ "-",
289
+ "名称",
290
+ "最高",
291
+ "最低",
292
+ "今开",
293
+ "昨收",
294
+ "-",
295
+ "-",
296
+ "-",
297
+ "-",
298
+ "-",
299
+ "-",
300
+ "-",
301
+ "-",
302
+ "-",
303
+ "-",
304
+ "-",
305
+ "-",
306
+ "-",
307
+ "-",
308
+ "-",
309
+ "-",
310
+ ]
311
+ temp_df = temp_df[
312
+ [
313
+ "序号",
314
+ "代码",
315
+ "名称",
316
+ "最新价",
317
+ "涨跌额",
318
+ "涨跌幅",
319
+ "今开",
320
+ "最高",
321
+ "最低",
322
+ "昨收",
323
+ "成交量",
324
+ "成交额",
325
+ ]
326
+ ]
327
+
328
+ temp_df = temp_df.rename(columns={
329
+ "序号": "index",
330
+ "代码": "symbol",
331
+ "名称": "name",
332
+ "最新价": "now_price",
333
+ "涨跌额": "range",
334
+ "涨跌幅": "chg",
335
+ "今开": "open",
336
+ "最高": "high",
337
+ "最低": "low",
338
+ "昨收": "yesterday_price",
339
+ "成交额": "amount",
340
+ "成交量": "volume",
341
+ })
342
+
343
+ return temp_df
344
+
345
+
346
+ def get_stock_hk_ggt_components_em_count(cookie, pn, proxies, page_size, time_out):
347
+ headers = {
348
+ 'Cookie': cookie
349
+ }
350
+ url = "https://33.push2.eastmoney.com/api/qt/clist/get"
351
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
352
+ params = {
353
+ "pn": str(pn),
354
+ "pz": str(page_size),
355
+ "po": "1",
356
+ "np": "2",
357
+ "ut": "bd1d9ddb04089700cf9c27f6f7426281",
358
+ "fltt": "2",
359
+ "fid": "f3",
360
+ "fs": "b:DLMK0146,b:DLMK0144",
361
+ "fields": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f23,f24,"
362
+ "f25,f26,f22,f33,f11,f62,f128,f136,f115,f152",
363
+ "_": str(current_timestamp),
364
+ }
365
+ try:
366
+ if proxies is None:
367
+ r = requests.get(url, params, timeout=time_out, headers=headers)
368
+ else:
369
+ r = requests.get(url, params, proxies=proxies, timeout=time_out, headers=headers)
370
+ data_json = r.json()
371
+ total_number = int(data_json['data']['total'])
372
+ return total_number
373
+ except Exception as e:
374
+ logger.error("获取港股通列表,实时行情异常:{}", e)
375
+ return 0
376
+
377
+
378
+ def repeated_acquisition_ask_hk_gtt_async(em_cookie, time_out, max_number, num_threads, pages_per_thread):
379
+ per_page = page_number
380
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
381
+ result_df = pd.DataFrame()
382
+
383
+ # 创建线程锁以确保线程安全
384
+ df_lock = Lock()
385
+
386
+ # 计算每个线程处理的页数范围
387
+ def process_page_range(start_page, end_page, thread_id):
388
+ nonlocal result_df
389
+ local_df = pd.DataFrame()
390
+ current_page = start_page
391
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
392
+
393
+ while current_page <= end_page and current_page <= total_pages:
394
+ proxies = {"https": proxy_ip, "http": proxy_ip}
395
+ try:
396
+ page_df = stock_hk_ggt_components_em(em_cookie, current_page, proxies, page_number, time_out)
397
+ if data_frame_util.is_not_empty(page_df):
398
+ local_df = pd.concat([local_df, page_df])
399
+ logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
400
+ current_page += 1
401
+ else:
402
+ time.sleep(0.2)
403
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
404
+ logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
405
+ except BaseException as e:
406
+ time.sleep(1)
407
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
408
+ logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
409
+
410
+ with df_lock:
411
+ result_df = pd.concat([result_df, local_df])
412
+ return len(local_df)
413
+
414
+ # 计算每个线程的页面范围
415
+ page_ranges = []
416
+ for i in range(num_threads):
417
+ start_page = i * pages_per_thread + 1
418
+ end_page = (i + 1) * pages_per_thread
419
+ if start_page > total_pages:
420
+ break
421
+ page_ranges.append((start_page, end_page, i + 1))
422
+
423
+ # 使用线程池执行任务
424
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
425
+ # 提交所有任务
426
+ futures = [
427
+ executor.submit(process_page_range, start, end, tid)
428
+ for start, end, tid in page_ranges
429
+ ]
430
+
431
+ # 等待所有任务完成并获取结果
432
+ results = []
433
+ for future in concurrent.futures.as_completed(futures):
434
+ try:
435
+ result = future.result()
436
+ results.append(result)
437
+ except Exception as e:
438
+ logger.error("线程执行出错: {}", e)
439
+
440
+ return rename_hg_ggt(result_df)
441
+
442
+
443
+ # 港股通
444
+
445
+
446
+ def get_ggt_real_time_quotes(em_cookie, time_out, pages_per_thread):
447
+ try_numer = 3
448
+ while try_numer > 0:
449
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
450
+ proxies = {"https": proxy_ip,
451
+ "http": proxy_ip}
452
+
453
+ max_number = get_stock_hk_ggt_components_em_count(em_cookie, 1, proxies, 20, time_out)
454
+ if max_number > 0:
455
+ break
456
+ try_numer = try_numer - 1
457
+ if max_number == 0:
458
+ return pd.DataFrame()
459
+
460
+ total_pages = (max_number + page_number - 1) // page_number # 向上取整
461
+
462
+ num_threads = int((total_pages / pages_per_thread) + 1)
463
+ return repeated_acquisition_ask_hk_gtt_async(em_cookie, time_out, max_number, num_threads, pages_per_thread)
464
+
465
+
466
+ if __name__ == '__main__':
467
+ em_cookie = cookie_info_service.get_em_cookie()
468
+ test_df = get_ggt_real_time_quotes(em_cookie, 30, 6)
469
+ print(test_df)