mns-common 1.3.9.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mns-common might be problematic. Click here for more details.

Files changed (67) hide show
  1. mns_common/__init__.py +1 -0
  2. mns_common/api/akshare/__init__.py +0 -1
  3. mns_common/api/akshare/k_line_api.py +20 -82
  4. mns_common/api/akshare/stock_bid_ask_api.py +21 -14
  5. mns_common/api/akshare/stock_zb_pool.py +2 -0
  6. mns_common/api/akshare/stock_zt_pool_api.py +1 -1
  7. mns_common/api/em/gd/east_money_stock_gdfx_free_top_10_api.py +62 -7
  8. mns_common/api/em/real_time/__init__.py +1 -1
  9. mns_common/api/em/real_time/east_money_debt_api.py +168 -71
  10. mns_common/api/em/real_time/east_money_etf_api.py +165 -27
  11. mns_common/api/em/real_time/east_money_stock_a_api.py +37 -38
  12. mns_common/api/em/real_time/east_money_stock_a_v2_api.py +97 -53
  13. mns_common/api/em/real_time/east_money_stock_common_api.py +174 -0
  14. mns_common/api/em/real_time/east_money_stock_hk_api.py +252 -271
  15. mns_common/api/em/real_time/east_money_stock_hk_gtt_api.py +291 -0
  16. mns_common/api/em/real_time/east_money_stock_multi_thread_api_v3.py +154 -0
  17. mns_common/api/em/real_time/east_money_stock_us_api.py +210 -82
  18. mns_common/api/em/real_time/real_time_quotes_repeat_api.py +195 -0
  19. mns_common/api/foreign_exchange/foreign_exchange_api.py +38 -0
  20. mns_common/api/k_line/stock_k_line_data_api.py +11 -1
  21. mns_common/api/kpl/common/__init__.py +3 -2
  22. mns_common/api/kpl/common/kpl_common_api.py +35 -0
  23. mns_common/api/kpl/symbol/symbol_his_quotes_api.py +1 -1
  24. mns_common/api/kpl/theme/kpl_theme_api.py +69 -0
  25. mns_common/api/kpl/yidong/__init__.py +7 -0
  26. mns_common/api/kpl/yidong/stock_bid_yi_dong_api.py +52 -0
  27. mns_common/api/proxies/liu_guan_proxy_api.py +55 -5
  28. mns_common/api/ths/company/company_product_area_industry_index_query.py +46 -0
  29. mns_common/api/ths/company/ths_company_info_api.py +2 -1
  30. mns_common/api/ths/company/ths_company_info_web.py +159 -0
  31. mns_common/api/ths/concept/app/ths_concept_index_app.py +3 -1
  32. mns_common/api/ths/wen_cai/ths_wen_cai_api.py +1 -1
  33. mns_common/api/ths/zt/ths_stock_zt_pool_api.py +20 -1
  34. mns_common/api/ths/zt/ths_stock_zt_pool_v2_api.py +105 -29
  35. mns_common/api/ths/zt/ths_stock_zt_reason_web_api.py +100 -0
  36. mns_common/api/us/ths_us_company_info_api.py +131 -0
  37. mns_common/api/xueqiu/xue_qiu_k_line_api.py +31 -23
  38. mns_common/component/common_service_fun_api.py +28 -8
  39. mns_common/component/company/company_common_service_new_api.py +2 -0
  40. mns_common/component/cookie/cookie_enum.py +16 -0
  41. mns_common/component/cookie/cookie_info_service.py +18 -8
  42. mns_common/component/data/data_init_api.py +13 -8
  43. mns_common/component/deal/deal_service_api.py +70 -8
  44. mns_common/component/deal/deal_service_v2_api.py +167 -0
  45. mns_common/component/em/em_stock_info_api.py +12 -3
  46. mns_common/component/exception/ExceptionMonitor.py +86 -0
  47. mns_common/component/exception/__init__.py +7 -0
  48. mns_common/component/main_line/__init__.py +7 -0
  49. mns_common/component/main_line/main_line_zt_reason_service.py +257 -0
  50. mns_common/component/proxies/proxy_common_api.py +199 -31
  51. mns_common/component/tfp/stock_tfp_api.py +82 -12
  52. mns_common/component/us/__init__.py +7 -0
  53. mns_common/component/us/us_stock_etf_info_api.py +130 -0
  54. mns_common/constant/db_name_constant.py +75 -26
  55. mns_common/constant/extra_income_db_name.py +97 -11
  56. mns_common/constant/strategy_classify.py +72 -0
  57. mns_common/db/MongodbUtil.py +3 -0
  58. mns_common/db/MongodbUtilLocal.py +3 -0
  59. {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/METADATA +1 -1
  60. {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/RECORD +64 -47
  61. mns_common/api/ths/concept/web/ths_company_info_web.py +0 -163
  62. mns_common/component/qmt/qmt_buy_service.py +0 -172
  63. mns_common/component/task/real_time_data_sync_check.py +0 -97
  64. /mns_common/{component/qmt → api/foreign_exchange}/__init__.py +0 -0
  65. /mns_common/{component/task → api/kpl/theme}/__init__.py +0 -0
  66. {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/WHEEL +0 -0
  67. {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/top_level.txt +0 -0
@@ -6,22 +6,22 @@ end = file_path.index('mns') + 16
6
6
  project_path = file_path[0:end]
7
7
  sys.path.append(project_path)
8
8
 
9
- from concurrent.futures import ThreadPoolExecutor
10
9
  import pandas as pd
11
10
  from loguru import logger
12
11
  import requests
13
12
  import time
14
13
  import numpy as np
14
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
15
+ import concurrent.futures
16
+ from concurrent.futures import ThreadPoolExecutor
17
+ from threading import Lock
18
+ import mns_common.utils.data_frame_util as data_frame_util
15
19
 
16
- # 最大返回条数
17
- max_number = 1200
18
- # 最小返回条数
19
- min_number = 1000
20
20
  # 分页条数
21
21
  page_number = 100
22
22
 
23
23
 
24
- def get_fund_etf_page_df(pn, proxies) -> pd.DataFrame:
24
+ def get_etf_count(pn, proxies, page_size, time_out):
25
25
  """
26
26
  东方财富-ETF 实时行情
27
27
  https://quote.eastmoney.com/center/gridlist.html#fund_etf
@@ -32,7 +32,7 @@ def get_fund_etf_page_df(pn, proxies) -> pd.DataFrame:
32
32
  url = "https://88.push2.eastmoney.com/api/qt/clist/get"
33
33
  params = {
34
34
  "pn": str(pn),
35
- "pz": "5000",
35
+ "pz": str(page_size),
36
36
  "po": "1",
37
37
  "np": "3",
38
38
  "ut": "bd1d9ddb04089700cf9c27f6f7426281",
@@ -53,16 +53,59 @@ def get_fund_etf_page_df(pn, proxies) -> pd.DataFrame:
53
53
  }
54
54
  try:
55
55
  if proxies is None:
56
- r = requests.get(url, params)
56
+ r = requests.get(url, params, timeout=time_out)
57
57
  else:
58
- r = requests.get(url, params, proxies=proxies)
58
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
59
+ data_json = r.json()
60
+ total_number = int(data_json['data']['total'])
61
+ return total_number
62
+ except Exception as e:
63
+ logger.error("获取ETF列表,实时行情异常:{}", e)
64
+ return 0
65
+
66
+
67
+ def get_fund_etf_page_df(pn, proxies, page_size, time_out) -> pd.DataFrame:
68
+ """
69
+ 东方财富-ETF 实时行情
70
+ https://quote.eastmoney.com/center/gridlist.html#fund_etf
71
+ :return: ETF 实时行情
72
+ :rtype: pandas.DataFrame
73
+ """
74
+ current_timestamp = str(int(round(time.time() * 1000, 0)))
75
+ url = "https://88.push2.eastmoney.com/api/qt/clist/get"
76
+ params = {
77
+ "pn": str(pn),
78
+ "pz": str(page_size),
79
+ "po": "1",
80
+ "np": "3",
81
+ "ut": "bd1d9ddb04089700cf9c27f6f7426281",
82
+ "fltt": "2",
83
+ "invt": "2",
84
+ "wbp2u": "|0|0|0|web",
85
+ "fid": "f12",
86
+ "fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
87
+ "fields": (
88
+ "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,"
89
+ "f12,f13,f14,f15,f16,f17,f18,f20,f21,"
90
+ "f23,f24,f25,f26,f22,f11,f30,f31,f32,f33,"
91
+ "f34,f35,f38,f62,f63,f64,f65,f66,f69,"
92
+ "f72,f75,f78,f81,f84,f87,f115,f124,f128,"
93
+ "f136,f152,f184,f297,f402,f441"
94
+ ),
95
+ "_": str(current_timestamp),
96
+ }
97
+ try:
98
+ if proxies is None:
99
+ r = requests.get(url, params, timeout=time_out)
100
+ else:
101
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
59
102
  data_json = r.json()
60
103
  if pn == 1:
61
104
  try:
62
105
  global max_number
63
106
  max_number = int(data_json['data']['total'])
64
107
  except Exception as e:
65
- logger.error(f"获取第{pn}页ETF列表异常: {e}")
108
+ logger.error("获取第{}页ETF列表异常:{}", page_size, str(e))
66
109
  return pd.DataFrame()
67
110
 
68
111
  temp_df = pd.DataFrame(data_json["data"]["diff"])
@@ -352,23 +395,118 @@ def rename_etf(fund_etf_spot_em_df):
352
395
  return fund_etf_spot_em_df
353
396
 
354
397
 
355
- def get_etf_real_time_quotes(proxies):
356
- # 获取第一页数据
357
- page_one_df = get_fund_etf_page_df(1, proxies)
358
- # 数据接口正常返回5600以上的数量
359
- if page_one_df.shape[0] > min_number:
360
- page_one_df = rename_etf(page_one_df)
361
- page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
362
- return page_one_df
363
- else:
364
- page_df = thread_pool_executor(proxies)
365
- page_df = rename_etf(page_df)
366
- page_df.drop_duplicates('symbol', keep='last', inplace=True)
367
- return page_df
398
+ def repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread):
399
+ per_page = page_number
400
+ total_pages = (max_number + per_page - 1) // per_page # 向上取整
401
+ result_df = pd.DataFrame()
402
+
403
+ # 创建线程锁以确保线程安全
404
+ df_lock = Lock()
405
+
406
+ # 计算每个线程处理的页数范围
407
+ def process_page_range(start_page, end_page, thread_id):
408
+ nonlocal result_df
409
+ local_df = pd.DataFrame()
410
+ current_page = start_page
411
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
412
+
413
+ while current_page <= end_page and current_page <= total_pages:
414
+ proxies = {"https": proxy_ip, "http": proxy_ip}
415
+ try:
416
+ page_df = get_fund_etf_page_df(current_page, proxies, page_number, time_out)
417
+ if data_frame_util.is_not_empty(page_df):
418
+ local_df = pd.concat([local_df, page_df])
419
+ logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
420
+ current_page += 1
421
+ else:
422
+ time.sleep(0.2)
423
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
424
+ logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
425
+ except BaseException as e:
426
+ time.sleep(1)
427
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
428
+ logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
429
+
430
+ with df_lock:
431
+ result_df = pd.concat([result_df, local_df])
432
+ return len(local_df)
433
+
434
+ # 计算每个线程的页面范围
435
+ page_ranges = []
436
+ for i in range(num_threads):
437
+ start_page = i * pages_per_thread + 1
438
+ end_page = (i + 1) * pages_per_thread
439
+ if start_page > total_pages:
440
+ break
441
+ page_ranges.append((start_page, end_page, i + 1))
442
+
443
+ # 使用线程池执行任务
444
+ with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
445
+ # 提交所有任务
446
+ futures = [
447
+ executor.submit(process_page_range, start, end, tid)
448
+ for start, end, tid in page_ranges
449
+ ]
450
+
451
+ # 等待所有任务完成并获取结果
452
+ results = []
453
+ for future in concurrent.futures.as_completed(futures):
454
+ try:
455
+ result = future.result()
456
+ results.append(result)
457
+ except Exception as e:
458
+ logger.error("线程执行出错: {}", e)
459
+
460
+ return rename_etf(result_df)
461
+
462
+
463
+ def get_etf_real_time_quotes(time_out, pages_per_thread):
464
+ try_numer = 3
465
+ while try_numer > 0:
466
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
467
+ proxies = {"https": proxy_ip,
468
+ "http": proxy_ip}
469
+
470
+ max_number = get_etf_count(1, proxies, 20, time_out)
471
+ if max_number > 0:
472
+ break
473
+ try_numer = try_numer - 1
474
+ if max_number == 0:
475
+ max_number==2000
476
+
477
+ total_pages = (max_number + page_number - 1) // page_number # 向上取整
478
+
479
+ num_threads = int((total_pages / pages_per_thread) + 1)
480
+ return repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread)
481
+
482
+
483
+ def get_etf_real_time_quotes_local_ip(time_out):
484
+ try_numer = 3
485
+ while try_numer > 0:
486
+ max_number = get_etf_count(1, None, 20, time_out)
487
+ if max_number > 0:
488
+ break
489
+ try_numer = try_numer - 1
490
+ if max_number == 0:
491
+ max_number = 2000
492
+ total_pages = (max_number + page_number - 1) // page_number # 向上取整
493
+
494
+ results_df = pd.DataFrame()
495
+ pn = 1
496
+ while pn <= total_pages:
497
+ try:
498
+ page_df = get_fund_etf_page_df(pn, None, page_number, time_out)
499
+ while data_frame_util.is_empty(page_df):
500
+ page_df = get_fund_etf_page_df(pn, None, page_number, time_out)
501
+ time.sleep(1)
502
+ results_df = pd.concat([results_df, page_df])
503
+ logger.info("同步A市场ETF第几{}页成功", pn)
504
+ pn = pn + 1
505
+ except BaseException as e:
506
+ logger.error("同步A市场ETF信息失败:{},{}", e, pn)
507
+ return rename_etf(results_df)
368
508
 
369
509
 
370
510
  if __name__ == '__main__':
371
- fund_etf_df = get_etf_real_time_quotes(None)
372
- fund_etf_df = fund_etf_df.sort_values(by=['amount'], ascending=False)
373
- fund_etf_df = fund_etf_df.fillna(0)
374
- print(fund_etf_df)
511
+ test_df = get_etf_real_time_quotes_local_ip(30)
512
+ print(test_df)
@@ -1,11 +1,12 @@
1
1
  import sys
2
2
  import os
3
+ import time
3
4
 
4
5
  file_path = os.path.abspath(__file__)
5
6
  end = file_path.index('mns') + 16
6
7
  project_path = file_path[0:end]
7
8
  sys.path.append(project_path)
8
-
9
+ from mns_common.db.MongodbUtil import MongodbUtil
9
10
  import requests
10
11
  import json
11
12
  import pandas as pd
@@ -14,15 +15,20 @@ import datetime
14
15
  from loguru import logger
15
16
  import mns_common.utils.data_frame_util as data_frame_util
16
17
 
18
+ mongodb_util = MongodbUtil('27017')
19
+ fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,"
20
+ "f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f100,f184,f211,f212"),
21
+ fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
22
+
17
23
  # 最大返回条数
18
24
  max_number = 5800
19
25
  # 最小返回条数
20
26
  min_number = 5600
21
27
  # 分页条数
22
- page_number = 100
28
+ PAGE_SIZE = 100
23
29
 
24
30
 
25
- def get_stock_page_data(pn, fields, fs, proxies):
31
+ def get_stock_page_data(pn, proxies, page_size, time_out):
26
32
  """
27
33
  获取单页股票数据
28
34
  """
@@ -36,7 +42,7 @@ def get_stock_page_data(pn, fields, fs, proxies):
36
42
  params = {
37
43
  "cb": "jQuery1124046660442520420653_" + str(current_timestamp_ms),
38
44
  "pn": str(pn),
39
- "pz": "10000", # 每页最大200条
45
+ "pz": str(page_size), # 每页最大200条
40
46
  "po": "0",
41
47
  "np": "3",
42
48
  "ut": "bd1d9ddb04089700cf9c27f6f7426281",
@@ -50,21 +56,12 @@ def get_stock_page_data(pn, fields, fs, proxies):
50
56
  }
51
57
  try:
52
58
  if proxies is None:
53
- r = requests.get(url, params)
59
+ r = requests.get(url, params, timeout=time_out)
54
60
  else:
55
- r = requests.get(url, params, proxies=proxies)
61
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
56
62
 
57
63
  data_text = r.text
58
- if pn == 1:
59
- try:
60
- begin_index_total = data_text.index('"total":')
61
64
 
62
- end_index_total = data_text.index('"diff"')
63
- global max_number
64
- max_number = int(data_text[begin_index_total + 8:end_index_total - 1])
65
- except Exception as e:
66
- logger.error(f"获取第{pn}页股票列表异常: {e}")
67
- return pd.DataFrame()
68
65
 
69
66
  begin_index = data_text.index('[')
70
67
  end_index = data_text.index(']')
@@ -73,24 +70,26 @@ def get_stock_page_data(pn, fields, fs, proxies):
73
70
  if data_json is None:
74
71
  return pd.DataFrame()
75
72
  else:
76
- return pd.DataFrame(data_json)
73
+ result_df = pd.DataFrame(data_json)
74
+ result_df['page_number'] = pn
75
+ return result_df
77
76
  except Exception as e:
78
- logger.error(f"获取第{pn}页股票列表异常: {e}")
77
+ # logger.error("获取第{}页股票列表异常:{}", pn, str(e))
79
78
  return pd.DataFrame()
80
79
 
81
80
 
82
- def all_stock_ticker_data_new(fields, fs, proxies) -> pd.DataFrame:
81
+ def all_stock_ticker_data_new(proxies, time_out) -> pd.DataFrame:
83
82
  """
84
83
  使用多线程获取所有股票数据
85
84
  """
86
85
 
87
- per_page = page_number
86
+ per_page = PAGE_SIZE
88
87
  total_pages = (max_number + per_page - 1) // per_page # 向上取整
89
88
 
90
89
  # 创建线程池
91
90
  with ThreadPoolExecutor(max_workers=10) as executor:
92
91
  # 提交任务,获取每页数据
93
- futures = [executor.submit(get_stock_page_data, pn, fields, fs, proxies)
92
+ futures = [executor.submit(get_stock_page_data, pn, proxies, PAGE_SIZE, time_out)
94
93
  for pn in range(1, total_pages + 1)]
95
94
 
96
95
  # 收集结果
@@ -107,22 +106,11 @@ def all_stock_ticker_data_new(fields, fs, proxies) -> pd.DataFrame:
107
106
  return pd.DataFrame()
108
107
 
109
108
 
110
- def get_real_time_quotes_all_stocks(proxies):
111
- fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,"
112
- "f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f100,f184,f211,f212"),
113
- fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
114
- # 获取第一页数据
115
- page_one_df = get_stock_page_data(1, fields, fs, proxies)
116
- # 数据接口正常返回5600以上的数量
117
- if page_one_df.shape[0] > min_number:
118
- page_one_df = rename_real_time_quotes_df(page_one_df)
119
- page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
120
- return page_one_df
121
- else:
122
- page_df = all_stock_ticker_data_new(fields, fs, proxies)
123
- page_df = rename_real_time_quotes_df(page_df)
124
- page_df.drop_duplicates('symbol', keep='last', inplace=True)
125
- return page_df
109
+ def get_real_time_quotes_all_stocks(proxies, time_out):
110
+ page_df = all_stock_ticker_data_new(proxies, time_out)
111
+ page_df = rename_real_time_quotes_df(page_df)
112
+ page_df.drop_duplicates('symbol', keep='last', inplace=True)
113
+ return page_df
126
114
 
127
115
 
128
116
  # 获取所有股票实时行情数据 f33,委比
@@ -295,8 +283,19 @@ def get_sum_north_south_net_buy_amt():
295
283
  return df
296
284
 
297
285
 
286
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
287
+
298
288
  # 示例调用
299
289
  if __name__ == "__main__":
290
+
300
291
  while True:
301
- df = get_real_time_quotes_all_stocks(None)
302
- logger.info("涨停数据,{}", 1)
292
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
293
+ proxy = {"https": proxy_ip}
294
+ logger.info(proxy_ip)
295
+ df = all_stock_ticker_data_new(proxy, 3)
296
+ logger.info("数据条数,{}", df.shape[0])
297
+ time.sleep(1)
298
+
299
+ else:
300
+ time.sleep(1)
301
+ logger.error("ip为空")
@@ -7,11 +7,20 @@ project_path = file_path[0:end]
7
7
  sys.path.append(project_path)
8
8
 
9
9
  import requests
10
+
11
+ import mns_common.utils.data_frame_util as data_frame_util
10
12
  import json
11
- import pandas as pd
12
- from concurrent.futures import ThreadPoolExecutor
13
13
  import datetime
14
+ from concurrent.futures import ThreadPoolExecutor
15
+ import mns_common.component.proxies.proxy_common_api as proxy_common_api
14
16
  from loguru import logger
17
+ import concurrent.futures
18
+ import pandas as pd
19
+ import time
20
+ from concurrent.futures import ThreadPoolExecutor
21
+ from threading import Lock
22
+ import mns_common.api.em.real_time.real_time_quotes_repeat_api as real_time_quotes_repeat_api
23
+ import mns_common.api.em.real_time.east_money_stock_common_api as east_money_stock_common_api
15
24
 
16
25
  #
17
26
  # fields_02 = "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108" \
@@ -20,6 +29,8 @@ from loguru import logger
20
29
  # ",f309,f310,f312,f313,f314,f315,f316,f317,f318,f319,f320,f321,f322,f323,f324,f325,f326,f327,f328,f329,f330,f331,f332,f333,f334,f335,f336,f337,f338,f339,f340,f341,f342,f343,f344,f345,f346,f347,f348,f349,f350,f351,f352,f353,f354,f355,f356,f357,f358,f359,f360,f361,f362,f363,f364,f365,f366,f367,f368,f369,f370,f371,f372,f373,f374,f375,f376,f377,f378,f379,f380,f381,f382,f383,f384,f385,f386,f387,f388,f389,f390,f391,f392,f393,f394,f395,f396,f397,f398,f399,f401"
21
30
 
22
31
 
32
+ fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
33
+
23
34
  fields = ("f2,f3,f5,f6,f8,"
24
35
  "f9,f10,f22,f12,f13,"
25
36
  "f14,f15,f16,f17,f18,"
@@ -31,15 +42,11 @@ fields = ("f2,f3,f5,f6,f8,"
31
42
  "f84,f102,f184,f100,f103,"
32
43
  "f352,f191,f193,f24,f25")
33
44
 
34
- # 最大返回条数
35
- max_number = 5800
36
- # 最小返回条数
37
- min_number = 5600
38
45
  # 分页条数
39
- page_number = 100
46
+ PAGE_SIZE = 100
40
47
 
41
48
 
42
- def get_stock_page_data(pn, fields, fs, proxies):
49
+ def get_stock_page_data(pn, proxies, page_number, time_out):
43
50
  """
44
51
  获取单页股票数据
45
52
  """
@@ -53,7 +60,7 @@ def get_stock_page_data(pn, fields, fs, proxies):
53
60
  params = {
54
61
  "cb": "jQuery1124046660442520420653_" + str(current_timestamp_ms),
55
62
  "pn": str(pn),
56
- "pz": "10000", # 每页最大200条
63
+ "pz": str(page_number), # 每页最大200条
57
64
  "po": "1",
58
65
  "np": "3",
59
66
  "ut": "bd1d9ddb04089700cf9c27f6f7426281",
@@ -67,9 +74,9 @@ def get_stock_page_data(pn, fields, fs, proxies):
67
74
  }
68
75
  try:
69
76
  if proxies is None:
70
- r = requests.get(url, params)
77
+ r = requests.get(url, params, timeout=time_out)
71
78
  else:
72
- r = requests.get(url, params, proxies=proxies)
79
+ r = requests.get(url, params, proxies=proxies, timeout=time_out)
73
80
  data_text = r.text
74
81
  begin_index = data_text.index('[')
75
82
  end_index = data_text.index(']')
@@ -80,52 +87,72 @@ def get_stock_page_data(pn, fields, fs, proxies):
80
87
  else:
81
88
  return pd.DataFrame(data_json)
82
89
  except Exception as e:
83
- logger.error(f"获取第{pn}页股票列表异常: {e}")
84
90
  return pd.DataFrame()
85
91
 
86
92
 
87
- def all_stock_ticker_data_new(fields, fs, proxies) -> pd.DataFrame:
88
- """
89
- 使用多线程获取所有股票数据
93
+ def all_stock_ticker_data_new(initial_proxies, time_out, max_number) -> pd.DataFrame:
90
94
  """
91
-
92
- per_page = page_number
93
- total_pages = (max_number + per_page - 1) // per_page # 向上取整
94
-
95
- # 创建线程池
96
- with ThreadPoolExecutor(max_workers=10) as executor:
97
- # 提交任务,获取每页数据
98
- futures = [executor.submit(get_stock_page_data, pn, fields, fs, proxies)
99
- for pn in range(1, total_pages + 1)]
100
-
101
- # 收集结果
102
- results = []
103
- for future in futures:
104
- result = future.result()
105
- if not result.empty:
106
- results.append(result)
107
-
108
- # 合并所有页面的数据
95
+ 使用多线程获取所有股票数据,失败页面会使用新IP重试,最多使用10个IP
96
+ """
97
+
98
+ total_pages = (max_number + PAGE_SIZE - 1) // PAGE_SIZE # 向上取整
99
+ all_pages = set(range(1, total_pages + 1)) # 所有需要获取的页码
100
+ success_pages = set() # 成功获取的页码
101
+ results = [] # 存储成功获取的数据
102
+ used_ip_count = 1 # 已使用IP计数器(初始IP算第一个)
103
+ MAX_IP_LIMIT = 10 # IP使用上限
104
+
105
+ # 循环处理直到所有页面成功或达到IP上限
106
+ while (all_pages - success_pages) and (used_ip_count < MAX_IP_LIMIT):
107
+ # 获取当前需要处理的失败页码
108
+ current_failed_pages = all_pages - success_pages
109
+ if used_ip_count > 1:
110
+ logger.info("当前需要处理的失败页码: {}, 已使用IP数量: {}/{}", current_failed_pages, used_ip_count,
111
+ MAX_IP_LIMIT)
112
+
113
+ # 首次使用初始代理,后续获取新代理
114
+ if len(success_pages) == 0:
115
+ proxies = initial_proxies
116
+ else:
117
+ # 每次重试前获取新代理并计数
118
+ # logger.info("获取新代理IP处理失败页面")
119
+ new_proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
120
+ proxies = {"https": new_proxy_ip}
121
+ # logger.info("新代理IP: {}, 已使用IP数量: {}/{}", new_proxy_ip, used_ip_count + 1, MAX_IP_LIMIT)
122
+ used_ip_count += 1 # 增加IP计数器
123
+
124
+ # 创建线程池处理当前失败的页码
125
+ with ThreadPoolExecutor(max_workers=10) as executor:
126
+ futures = {
127
+ executor.submit(get_stock_page_data, pn, proxies, PAGE_SIZE, time_out): pn
128
+ for pn in current_failed_pages
129
+ }
130
+
131
+ # 收集结果并记录成功页码
132
+ for future, pn in futures.items():
133
+ try:
134
+ result = future.result()
135
+ if not result.empty:
136
+ results.append(result)
137
+ success_pages.add(pn)
138
+ # else:
139
+ # logger.warning("页码 {} 未返回有效数据", pn)
140
+ except Exception as e:
141
+ continue
142
+ # logger.error("页码 {} 处理异常: {}", pn, str(e))
143
+
144
+ # 检查是否达到IP上限
145
+ if used_ip_count >= MAX_IP_LIMIT and (all_pages - success_pages):
146
+ remaining_pages = all_pages - success_pages
147
+ logger.warning("已达到最大IP使用限制({}个),剩余未获取页码: {}, 返回现有数据", MAX_IP_LIMIT, remaining_pages)
148
+
149
+ # 合并所有成功获取的数据
109
150
  if results:
110
151
  return pd.concat(results, ignore_index=True)
111
152
  else:
112
153
  return pd.DataFrame()
113
154
 
114
155
 
115
- def get_all_real_time_quotes(proxies):
116
- fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
117
- # 获取第一页数据
118
- page_one_df = get_stock_page_data(1, fields, fs, proxies)
119
- # 数据接口正常返回5600以上的数量
120
- if page_one_df.shape[0] > min_number:
121
- page_one_df = rename_real_time_quotes_df(page_one_df)
122
- return page_one_df
123
- else:
124
- page_df = all_stock_ticker_data_new(fields, fs, proxies)
125
- page_df = rename_real_time_quotes_df(page_df)
126
- return page_df
127
-
128
-
129
156
  # 获取所有股票实时行情数据 f33,委比
130
157
  def rename_real_time_quotes_df(temp_df):
131
158
  temp_df = temp_df.rename(columns={
@@ -286,11 +313,28 @@ def rename_real_time_quotes_df(temp_df):
286
313
  return temp_df
287
314
 
288
315
 
316
+ def get_stock_real_time_quotes(time_out):
317
+ try_numer = 3
318
+ while try_numer > 0:
319
+ proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
320
+ initial_proxies = {"https": proxy_ip,
321
+ "http": proxy_ip}
322
+
323
+ total_number = east_money_stock_common_api.get_stocks_num(1, initial_proxies, 20, time_out)
324
+ if total_number > 0:
325
+ break
326
+ try_numer = try_numer - 1
327
+ if total_number == 0:
328
+ return pd.DataFrame()
329
+
330
+ page_df = all_stock_ticker_data_new(initial_proxies, time_out, total_number)
331
+ page_df = rename_real_time_quotes_df(page_df)
332
+ page_df.drop_duplicates('symbol', keep='last', inplace=True)
333
+ return page_df
334
+
335
+
289
336
  # 示例调用
290
337
  if __name__ == "__main__":
291
-
292
- while True:
293
- df = get_all_real_time_quotes(None)
294
- print(df)
295
- zt_df = df.loc[df['wei_bi'] == 100]
296
- logger.info("涨停数据,{}", zt_df)
338
+ df = get_stock_real_time_quotes(5)
339
+ zt_df = df.loc[df['wei_bi'] == 100]
340
+ logger.info("同步次数", zt_df.shape[0])