mns-common 1.3.3.5__py3-none-any.whl → 1.5.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. mns_common/api/akshare/__init__.py +0 -1
  2. mns_common/api/akshare/k_line_api.py +20 -82
  3. mns_common/api/akshare/stock_bid_ask_api.py +22 -97
  4. mns_common/api/akshare/stock_zb_pool.py +2 -0
  5. mns_common/api/akshare/stock_zt_pool_api.py +1 -1
  6. mns_common/api/em/gd/__init__.py +7 -0
  7. mns_common/api/em/{east_money_stock_gdfx_free_top_10_api.py → gd/east_money_stock_gdfx_free_top_10_api.py} +64 -9
  8. mns_common/api/em/real_time/__init__.py +7 -0
  9. mns_common/api/em/{east_money_debt_api.py → real_time/east_money_debt_api.py} +154 -69
  10. mns_common/api/em/{east_money_etf_api.py → real_time/east_money_etf_api.py} +149 -27
  11. mns_common/api/em/real_time/east_money_stock_a_api.py +301 -0
  12. mns_common/api/em/real_time/east_money_stock_a_v2_api.py +340 -0
  13. mns_common/api/em/real_time/east_money_stock_common_api.py +174 -0
  14. mns_common/api/em/real_time/east_money_stock_hk_api.py +288 -0
  15. mns_common/api/em/real_time/east_money_stock_hk_gtt_api.py +260 -0
  16. mns_common/api/em/real_time/east_money_stock_multi_thread_api_v3.py +154 -0
  17. mns_common/api/em/{east_money_stock_us_api.py → real_time/east_money_stock_us_api.py} +149 -72
  18. mns_common/api/em/real_time/real_time_quotes_repeat_api.py +195 -0
  19. mns_common/api/k_line/stock_k_line_data_api.py +11 -1
  20. mns_common/api/k_line/stock_minute_data_api.py +1 -0
  21. mns_common/api/kpl/common/kpl_common_api.py +35 -0
  22. mns_common/api/kpl/symbol/symbol_his_quotes_api.py +1 -1
  23. mns_common/api/proxies/__init__.py +7 -0
  24. mns_common/api/proxies/liu_guan_proxy_api.py +115 -0
  25. mns_common/api/ths/company/company_product_area_industry_index_query.py +46 -0
  26. mns_common/api/ths/company/ths_company_info_api.py +13 -9
  27. mns_common/api/ths/company/ths_company_info_web.py +159 -0
  28. mns_common/api/ths/concept/app/ths_concept_index_app.py +3 -1
  29. mns_common/api/ths/wen_cai/ths_wen_cai_api.py +10 -7
  30. mns_common/api/ths/zt/ths_stock_zt_pool_api.py +21 -4
  31. mns_common/api/ths/zt/ths_stock_zt_pool_v2_api.py +111 -40
  32. mns_common/api/xueqiu/__init__.py +7 -0
  33. mns_common/api/xueqiu/xue_qiu_k_line_api.py +83 -0
  34. mns_common/component/__init__.py +1 -1
  35. mns_common/component/classify/symbol_classify_api.py +7 -7
  36. mns_common/component/common_service_fun_api.py +66 -6
  37. mns_common/component/company/company_common_service_api.py +21 -1
  38. mns_common/component/company/company_common_service_new_api.py +4 -1
  39. mns_common/component/cookie/cookie_enum.py +15 -0
  40. mns_common/component/cookie/cookie_info_service.py +9 -4
  41. mns_common/component/data/data_init_api.py +13 -8
  42. mns_common/component/deal/deal_service_api.py +70 -8
  43. mns_common/component/deal/deal_service_v2_api.py +167 -0
  44. mns_common/component/em/__init__.py +7 -0
  45. mns_common/component/em/em_real_time_quotes_api.py +56 -0
  46. mns_common/component/em/em_stock_info_api.py +48 -0
  47. mns_common/component/exception/ExceptionMonitor.py +86 -0
  48. mns_common/component/exception/__init__.py +7 -0
  49. mns_common/component/k_line/common/k_line_common_service_api.py +4 -0
  50. mns_common/component/main_line/__init__.py +7 -0
  51. mns_common/component/main_line/main_line_zt_reason_service.py +237 -0
  52. mns_common/component/proxies/__init__.py +7 -0
  53. mns_common/component/proxies/proxy_common_api.py +252 -0
  54. mns_common/component/self_choose/__init__.py +13 -0
  55. mns_common/component/tfp/stock_tfp_api.py +82 -12
  56. mns_common/component/us/__init__.py +7 -0
  57. mns_common/component/us/us_stock_etf_info_api.py +125 -0
  58. mns_common/constant/__init__.py +1 -0
  59. mns_common/constant/db_name_constant.py +65 -34
  60. mns_common/constant/extra_income_db_name.py +154 -0
  61. mns_common/constant/strategy_classify.py +72 -0
  62. mns_common/db/MongodbUtil.py +2 -1
  63. mns_common/db/MongodbUtilLocal.py +1 -0
  64. mns_common/db/v2/MongodbUtilV2.py +0 -4
  65. mns_common-1.5.7.2.dist-info/METADATA +4 -0
  66. {mns_common-1.3.3.5.dist-info → mns_common-1.5.7.2.dist-info}/RECORD +70 -45
  67. {mns_common-1.3.3.5.dist-info → mns_common-1.5.7.2.dist-info}/WHEEL +1 -1
  68. mns_common/api/em/east_money_stock_api.py +0 -222
  69. mns_common/api/em/east_money_stock_hk_api.py +0 -318
  70. mns_common/api/em/east_money_stock_v2_api.py +0 -219
  71. mns_common/api/ths/concept/web/ths_company_info_web.py +0 -163
  72. mns_common/component/qmt/qmt_buy_service.py +0 -172
  73. mns_common-1.3.3.5.dist-info/METADATA +0 -4
  74. /mns_common/{component/qmt → api/em/concept}/__init__.py +0 -0
  75. /mns_common/api/em/{em_concept_index_api.py → concept/em_concept_index_api.py} +0 -0
  76. {mns_common-1.3.3.5.dist-info → mns_common-1.5.7.2.dist-info}/top_level.txt +0 -0
@@ -1,42 +1,76 @@
1
1
  import sys
2
2
  import os
3
3
 
4
- import sys
5
- import os
6
-
7
4
  file_path = os.path.abspath(__file__)
8
5
  end = file_path.index('mns') + 14
9
6
  project_path = file_path[0:end]
10
7
  sys.path.append(project_path)
11
- from concurrent.futures import ThreadPoolExecutor
8
+
9
+ import json
10
+ import akshare as ak
12
11
  import pandas as pd
13
12
  from loguru import logger
14
- import json
15
13
  import requests
16
14
  import time
17
- import akshare as ak
18
15
  import numpy as np
16
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from threading import Lock
19
+ import concurrent.futures
20
+ import mns_common.utils.data_frame_util as data_frame_util
19
21
 
20
- # 最大返回条数
21
- max_number = 600
22
- # 最小返回条数
23
- min_number = 500
24
22
  # 分页条数
25
23
  page_number = 100
26
24
 
25
+ fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f184,"
26
+ "f211,f212,f232,f233,f234")
27
+
28
+
29
+ def get_kzz_count(pn, proxies, page_size, time_out):
30
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
31
+ url = "https://push2.eastmoney.com/api/qt/clist/get"
32
+
33
+ params = {
34
+ "cb": "jQuery34103608466964799838_" + current_timestamp,
35
+ "pn": str(pn),
36
+ "np": 3,
37
+ "ut": "8a086bfc3570bdde64a6a1c585cccb35",
38
+ "fltt": 1,
39
+ "invt": 1,
40
+ "fs": "m:0+e:11,m:1+e:11,m:1+e:11+s:4194304,m:0+e:11+s:8388608",
41
+ "dpt": "zqsc.zpg",
42
+ "fields": fields,
43
+ "wbp2u": "|0|0|0|wap",
44
+ "fid": "f12",
45
+ "po": 1,
46
+ "pz": str(page_size),
47
+ "_": current_timestamp
48
+ }
49
+ try:
50
+ if proxies is None:
51
+ r = requests.get(url, params, timeout=time_out)
52
+ else:
53
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
54
+ data_text = r.text
55
+
56
+ begin_index_total = data_text.index('"total":')
57
+
58
+ end_index_total = data_text.index('"diff"')
59
+ max_number = int(data_text[begin_index_total + 8:end_index_total - 1])
60
+ return max_number
61
+
62
+
63
+ except Exception as e:
64
+ logger.error("获取可转债列表,实时行情异常:{}", e)
65
+ return 0
27
66
 
28
- # fields_02 = "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108" \
29
- # ",f109,f110,f111,f112,f113,f114,f115,f116,f117,f118,f119,f120,f121,f122,f123,f124,f125,f126,f127,f128,f129,f130,f131,f132,f133,f134,f135,f136,f137,f138,f139,f140,f141,f142,f143,f144,f145,f146,f147,f148,f149,f150,f151,f152,f153,f154,f155,f156,f157,f158,f159,f160,f161,f162,f163,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f174,f175,f176,f177,f178,f179,f180,f181,f182,f183,f184,f185,f186,f187,f188,f189,f190,f191,f192,f193,f194,f195,f196,f197,f198,f199,f200" \
30
- # ",f209,f210,f211,f212,f213,f214,f215,f216,f217,f218,f219,f220,f221,f222,f223,f224,f225,f226,f227,f228,f229,f230,f231,f232,f233,f234,f235,f236,f237,f238,f239,f240,f241,f242,f243,f244,f245,f246,f247,f248,f249,f250,f251,f252,f253,f254,f255,f256,f257,f258,f259,f260,f261,f262,f263,f264,f265,f266,f267,f268,f269,f270,f271,f272,f273,f274,f275,f276,f277,f278,f279,f280,f281,f282,f283,f284,f285,f286,f287,f288,f289,f290,f291,f292,f293,f294,f295,f296,f297,f298,f299,f300" \
31
- # ",f309,f310,f312,f313,f314,f315,f316,f317,f318,f319,f320,f321,f322,f323,f324,f325,f326,f327,f328,f329,f330,f331,f332,f333,f334,f335,f336,f337,f338,f339,f340,f341,f342,f343,f344,f345,f346,f347,f348,f349,f350,f351,f352,f353,f354,f355,f356,f357,f358,f359,f360,f361,f362,f363,f364,f365,f366,f367,f368,f369,f370,f371,f372,f373,f374,f375,f376,f377,f378,f379,f380,f381,f382,f383,f384,f385,f386,f387,f388,f389,f390,f391,f392,f393,f394,f395,f396,f397,f398,f399,f401"
32
- #
33
67
 
34
68
  #
35
69
  # url = https://push2.eastmoney.com/api/qt/clist/get?cb=jQuery34103608466964799838_1718163189869&pn=1&np=1&ut
36
70
  # =8a086bfc3570bdde64a6a1c585cccb35&fltt=1&invt=1&fs=m:0+e:11,m:1+e:11,m:1+e:11+s:4194304,
37
71
  # m:0+e:11+s:8388608&dpt=zqsc.zpg&fields=f1,f2,f3,f4,f5,f6,f8,f10,f12,f13,f14,f18,f22,f152,
38
72
  # f237&wbp2u=|0|0|0|wap&fid=f3&po=1&pz=2000&_=1718163189870
39
- def get_debt_page_data(fields, pn) -> pd.DataFrame:
73
+ def get_debt_page_data(pn, proxies, page_size, time_out) -> pd.DataFrame:
40
74
  current_timestamp = str(int(round(time.time() * 1000, 0)))
41
75
  url = "https://push2.eastmoney.com/api/qt/clist/get"
42
76
 
@@ -53,12 +87,27 @@ def get_debt_page_data(fields, pn) -> pd.DataFrame:
53
87
  "wbp2u": "|0|0|0|wap",
54
88
  "fid": "f12",
55
89
  "po": 1,
56
- "pz": 2000,
90
+ "pz": str(page_size),
57
91
  "_": current_timestamp
58
92
  }
59
93
  try:
60
- r = requests.get(url, params)
94
+ if proxies is None:
95
+ r = requests.get(url, params, timeout=time_out)
96
+ else:
97
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
61
98
  data_text = r.text
99
+
100
+ if pn == 1:
101
+ try:
102
+ begin_index_total = data_text.index('"total":')
103
+
104
+ end_index_total = data_text.index('"diff"')
105
+ global max_number
106
+ max_number = int(data_text[begin_index_total + 8:end_index_total - 1])
107
+ except Exception as e:
108
+ logger.error(f"获取第{pn}页可转债列表异常: {e}")
109
+ return pd.DataFrame()
110
+
62
111
  begin_index = data_text.index('[')
63
112
  end_index = data_text.index(']')
64
113
  data_json = data_text[begin_index:end_index + 1]
@@ -72,52 +121,6 @@ def get_debt_page_data(fields, pn) -> pd.DataFrame:
72
121
  return pd.DataFrame()
73
122
 
74
123
 
75
- def all_debt_ticker_data(fields) -> pd.DataFrame:
76
- """
77
- 使用多线程获取所有债券数据
78
- """
79
- # 计算总页数,假设总共有1000条数据,每页200条
80
-
81
- per_page = page_number
82
- total_pages = (max_number + per_page - 1) // per_page # 向上取整
83
-
84
- # 创建线程池
85
- with ThreadPoolExecutor(max_workers=3) as executor:
86
- # 提交任务,获取每页数据
87
- futures = [executor.submit(get_debt_page_data, fields, pn)
88
- for pn in range(1, total_pages + 1)]
89
-
90
- # 收集结果
91
- results = []
92
- for future in futures:
93
- result = future.result()
94
- if not result.empty:
95
- results.append(result)
96
-
97
- # 合并所有页面的数据
98
- if results:
99
- return pd.concat(results, ignore_index=True)
100
- else:
101
- return pd.DataFrame()
102
-
103
-
104
- def get_debt_real_time_quotes():
105
- fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f184,"
106
- "f211,f212,f232,f233,f234")
107
- # 获取第一页数据
108
- page_one_df = get_debt_page_data(fields, 1)
109
- # 数据接口正常返回5600以上的数量
110
- if page_one_df.shape[0] > min_number:
111
- page_one_df = rename_real_time_quotes_df(page_one_df)
112
- page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
113
- return page_one_df
114
- else:
115
- page_df = all_debt_ticker_data(fields)
116
- page_df = rename_real_time_quotes_df(page_df)
117
- page_df.drop_duplicates('symbol', keep='last', inplace=True)
118
- return page_df
119
-
120
-
121
124
  def rename_real_time_quotes_df(temp_df):
122
125
  temp_df = temp_df.rename(columns={
123
126
  "f2": "now_price",
@@ -283,9 +286,91 @@ def get_kzz_bond_info():
283
286
  logger.error("获取可转债信息异常:{}", e)
284
287
 
285
288
 
289
+ def repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread):
290
+ per_page = page_number
291
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
292
+ result_df = pd.DataFrame()
293
+
294
+ # 创建线程锁以确保线程安全
295
+ df_lock = Lock()
296
+
297
+ # 计算每个线程处理的页数范围
298
+ def process_page_range(start_page, end_page, thread_id):
299
+ nonlocal result_df
300
+ local_df = pd.DataFrame()
301
+ current_page = start_page
302
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
303
+
304
+ while current_page <= end_page and current_page <= total_pages:
305
+ proxies = {"https": proxy_ip, "http": proxy_ip}
306
+ try:
307
+ page_df = get_debt_page_data(current_page, proxies, page_number, time_out)
308
+ if data_frame_util.is_not_empty(page_df):
309
+ local_df = pd.concat([local_df, page_df])
310
+ logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
311
+ current_page += 1
312
+ else:
313
+ time.sleep(0.2)
314
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
315
+ logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
316
+ except BaseException as e:
317
+ time.sleep(1)
318
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
319
+ logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
320
+
321
+ with df_lock:
322
+ result_df = pd.concat([result_df, local_df])
323
+ return len(local_df)
324
+
325
+ # 计算每个线程的页面范围
326
+ page_ranges = []
327
+ for i in range(num_threads):
328
+ start_page = i * pages_per_thread + 1
329
+ end_page = (i + 1) * pages_per_thread
330
+ if start_page > total_pages:
331
+ break
332
+ page_ranges.append((start_page, end_page, i + 1))
333
+
334
+ # 使用线程池执行任务
335
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
336
+ # 提交所有任务
337
+ futures = [
338
+ executor.submit(process_page_range, start, end, tid)
339
+ for start, end, tid in page_ranges
340
+ ]
341
+
342
+ # 等待所有任务完成并获取结果
343
+ results = []
344
+ for future in concurrent.futures.as_completed(futures):
345
+ try:
346
+ result = future.result()
347
+ results.append(result)
348
+ except Exception as e:
349
+ logger.error("线程执行出错: {}", e)
350
+
351
+ return rename_real_time_quotes_df(result_df)
352
+
353
+
354
+ def get_kzz_real_time_quotes(time_out, pages_per_thread):
355
+ try_numer = 3
356
+ while try_numer > 0:
357
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
358
+ proxies = {"https": proxy_ip,
359
+ "http": proxy_ip}
360
+
361
+ max_number = get_kzz_count(1, proxies, 20, time_out)
362
+ if max_number > 0:
363
+ break
364
+ try_numer = try_numer - 1
365
+ if max_number == 0:
366
+ return pd.DataFrame()
367
+
368
+ total_pages = (max_number + page_number - 1) // page_number # 向上取整
369
+
370
+ num_threads = int((total_pages / pages_per_thread) + 1)
371
+ return repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread)
372
+
373
+
286
374
  if __name__ == '__main__':
287
- info_df = get_kzz_bond_info()
288
- print(info_df)
289
- while True:
290
- df = get_debt_real_time_quotes()
291
- logger.info(df)
375
+ test_df = get_kzz_real_time_quotes(30, 6)
376
+ print(test_df)
@@ -6,22 +6,22 @@ end = file_path.index('mns') + 16
6
6
  project_path = file_path[0:end]
7
7
  sys.path.append(project_path)
8
8
 
9
- from concurrent.futures import ThreadPoolExecutor
10
9
  import pandas as pd
11
10
  from loguru import logger
12
11
  import requests
13
12
  import time
14
13
  import numpy as np
14
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
15
+ import concurrent.futures
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from threading import Lock
18
+ import mns_common.utils.data_frame_util as data_frame_util
15
19
 
16
- # 最大返回条数
17
- max_number = 1200
18
- # 最小返回条数
19
- min_number = 1000
20
20
  # 分页条数
21
21
  page_number = 100
22
22
 
23
23
 
24
- def get_fund_etf_page_df(pn) -> pd.DataFrame:
24
+ def get_etf_count(pn, proxies, page_size, time_out):
25
25
  """
26
26
  东方财富-ETF 实时行情
27
27
  https://quote.eastmoney.com/center/gridlist.html#fund_etf
@@ -32,7 +32,7 @@ def get_fund_etf_page_df(pn) -> pd.DataFrame:
32
32
  url = "https://88.push2.eastmoney.com/api/qt/clist/get"
33
33
  params = {
34
34
  "pn": str(pn),
35
- "pz": "5000",
35
+ "pz": str(page_size),
36
36
  "po": "1",
37
37
  "np": "3",
38
38
  "ut": "bd1d9ddb04089700cf9c27f6f7426281",
@@ -52,8 +52,62 @@ def get_fund_etf_page_df(pn) -> pd.DataFrame:
52
52
  "_": str(current_timestamp),
53
53
  }
54
54
  try:
55
- r = requests.get(url, timeout=15, params=params)
55
+ if proxies is None:
56
+ r = requests.get(url, params, timeout=time_out)
57
+ else:
58
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
56
59
  data_json = r.json()
60
+ total_number = int(data_json['data']['total'])
61
+ return total_number
62
+ except Exception as e:
63
+ logger.error("获取ETF列表,实时行情异常:{}", e)
64
+ return 0
65
+
66
+
67
+ def get_fund_etf_page_df(pn, proxies, page_size, time_out) -> pd.DataFrame:
68
+ """
69
+ 东方财富-ETF 实时行情
70
+ https://quote.eastmoney.com/center/gridlist.html#fund_etf
71
+ :return: ETF 实时行情
72
+ :rtype: pandas.DataFrame
73
+ """
74
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
75
+ url = "https://88.push2.eastmoney.com/api/qt/clist/get"
76
+ params = {
77
+ "pn": str(pn),
78
+ "pz": str(page_size),
79
+ "po": "1",
80
+ "np": "3",
81
+ "ut": "bd1d9ddb04089700cf9c27f6f7426281",
82
+ "fltt": "2",
83
+ "invt": "2",
84
+ "wbp2u": "|0|0|0|web",
85
+ "fid": "f12",
86
+ "fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
87
+ "fields": (
88
+ "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,"
89
+ "f12,f13,f14,f15,f16,f17,f18,f20,f21,"
90
+ "f23,f24,f25,f26,f22,f11,f30,f31,f32,f33,"
91
+ "f34,f35,f38,f62,f63,f64,f65,f66,f69,"
92
+ "f72,f75,f78,f81,f84,f87,f115,f124,f128,"
93
+ "f136,f152,f184,f297,f402,f441"
94
+ ),
95
+ "_": str(current_timestamp),
96
+ }
97
+ try:
98
+ if proxies is None:
99
+ r = requests.get(url, params, timeout=time_out)
100
+ else:
101
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
102
+ data_json = r.json()
103
+ if pn == 1:
104
+ try:
105
+ global max_number
106
+ max_number = int(data_json['data']['total'])
107
+ except Exception as e:
108
+ logger.error("获取第{}页ETF列表异常:{}", page_size, str(e))
109
+ return pd.DataFrame()
110
+
57
111
  temp_df = pd.DataFrame(data_json["data"]["diff"])
58
112
  temp_df.rename(
59
113
  columns={
@@ -210,7 +264,7 @@ def get_fund_etf_page_df(pn) -> pd.DataFrame:
210
264
  return pd.DataFrame()
211
265
 
212
266
 
213
- def thread_pool_executor():
267
+ def thread_pool_executor(proxies):
214
268
  """
215
269
  使用多线程获取所有ETF数据
216
270
  """
@@ -222,7 +276,7 @@ def thread_pool_executor():
222
276
  # 创建线程池
223
277
  with ThreadPoolExecutor(max_workers=3) as executor:
224
278
  # 提交任务,获取每页数据
225
- futures = [executor.submit(get_fund_etf_page_df, pn)
279
+ futures = [executor.submit(get_fund_etf_page_df, pn, proxies)
226
280
  for pn in range(1, total_pages + 1)]
227
281
 
228
282
  # 收集结果
@@ -341,23 +395,91 @@ def rename_etf(fund_etf_spot_em_df):
341
395
  return fund_etf_spot_em_df
342
396
 
343
397
 
344
- def get_etf_real_time_quotes():
345
- # 获取第一页数据
346
- page_one_df = get_fund_etf_page_df(1)
347
- # 数据接口正常返回5600以上的数量
348
- if page_one_df.shape[0] > min_number:
349
- page_one_df = rename_etf(page_one_df)
350
- page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
351
- return page_one_df
352
- else:
353
- page_df = thread_pool_executor()
354
- page_df = rename_etf(page_df)
355
- page_df.drop_duplicates('symbol', keep='last', inplace=True)
356
- return page_df
398
+ def repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread):
399
+ per_page = page_number
400
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
401
+ result_df = pd.DataFrame()
402
+
403
+ # 创建线程锁以确保线程安全
404
+ df_lock = Lock()
405
+
406
+ # 计算每个线程处理的页数范围
407
+ def process_page_range(start_page, end_page, thread_id):
408
+ nonlocal result_df
409
+ local_df = pd.DataFrame()
410
+ current_page = start_page
411
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
412
+
413
+ while current_page <= end_page and current_page <= total_pages:
414
+ proxies = {"https": proxy_ip, "http": proxy_ip}
415
+ try:
416
+ page_df = get_fund_etf_page_df(current_page, proxies, page_number, time_out)
417
+ if data_frame_util.is_not_empty(page_df):
418
+ local_df = pd.concat([local_df, page_df])
419
+ logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
420
+ current_page += 1
421
+ else:
422
+ time.sleep(0.2)
423
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
424
+ logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
425
+ except BaseException as e:
426
+ time.sleep(1)
427
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
428
+ logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
429
+
430
+ with df_lock:
431
+ result_df = pd.concat([result_df, local_df])
432
+ return len(local_df)
433
+
434
+ # 计算每个线程的页面范围
435
+ page_ranges = []
436
+ for i in range(num_threads):
437
+ start_page = i * pages_per_thread + 1
438
+ end_page = (i + 1) * pages_per_thread
439
+ if start_page > total_pages:
440
+ break
441
+ page_ranges.append((start_page, end_page, i + 1))
442
+
443
+ # 使用线程池执行任务
444
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
445
+ # 提交所有任务
446
+ futures = [
447
+ executor.submit(process_page_range, start, end, tid)
448
+ for start, end, tid in page_ranges
449
+ ]
450
+
451
+ # 等待所有任务完成并获取结果
452
+ results = []
453
+ for future in concurrent.futures.as_completed(futures):
454
+ try:
455
+ result = future.result()
456
+ results.append(result)
457
+ except Exception as e:
458
+ logger.error("线程执行出错: {}", e)
459
+
460
+ return rename_etf(result_df)
461
+
462
+
463
+ def get_etf_real_time_quotes(time_out, pages_per_thread):
464
+ try_numer = 3
465
+ while try_numer > 0:
466
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
467
+ proxies = {"https": proxy_ip,
468
+ "http": proxy_ip}
469
+
470
+ max_number = get_etf_count(1, proxies, 20, time_out)
471
+ if max_number > 0:
472
+ break
473
+ try_numer = try_numer - 1
474
+ if max_number == 0:
475
+ return pd.DataFrame()
476
+
477
+ total_pages = (max_number + page_number - 1) // page_number # 向上取整
478
+
479
+ num_threads = int((total_pages / pages_per_thread) + 1)
480
+ return repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread)
357
481
 
358
482
 
359
483
  if __name__ == '__main__':
360
- fund_etf_df = get_etf_real_time_quotes()
361
- fund_etf_df = fund_etf_df.sort_values(by=['amount'], ascending=False)
362
- fund_etf_df = fund_etf_df.fillna(0)
363
- print(fund_etf_df)
484
+ test_df = get_etf_real_time_quotes(30, 6)
485
+ print(test_df)