mns-common 1.3.9.2__py3-none-any.whl → 1.6.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mns-common might be problematic. Click here for more details.
- mns_common/__init__.py +1 -0
- mns_common/api/akshare/__init__.py +0 -1
- mns_common/api/akshare/k_line_api.py +20 -82
- mns_common/api/akshare/stock_bid_ask_api.py +21 -14
- mns_common/api/akshare/stock_zb_pool.py +2 -0
- mns_common/api/akshare/stock_zt_pool_api.py +1 -1
- mns_common/api/em/gd/east_money_stock_gdfx_free_top_10_api.py +62 -7
- mns_common/api/em/real_time/__init__.py +1 -1
- mns_common/api/em/real_time/east_money_debt_api.py +168 -71
- mns_common/api/em/real_time/east_money_etf_api.py +165 -27
- mns_common/api/em/real_time/east_money_stock_a_api.py +37 -38
- mns_common/api/em/real_time/east_money_stock_a_v2_api.py +97 -53
- mns_common/api/em/real_time/east_money_stock_common_api.py +174 -0
- mns_common/api/em/real_time/east_money_stock_hk_api.py +252 -271
- mns_common/api/em/real_time/east_money_stock_hk_gtt_api.py +291 -0
- mns_common/api/em/real_time/east_money_stock_multi_thread_api_v3.py +154 -0
- mns_common/api/em/real_time/east_money_stock_us_api.py +210 -82
- mns_common/api/em/real_time/real_time_quotes_repeat_api.py +195 -0
- mns_common/api/foreign_exchange/foreign_exchange_api.py +38 -0
- mns_common/api/k_line/stock_k_line_data_api.py +11 -1
- mns_common/api/kpl/common/__init__.py +3 -2
- mns_common/api/kpl/common/kpl_common_api.py +35 -0
- mns_common/api/kpl/symbol/symbol_his_quotes_api.py +1 -1
- mns_common/api/kpl/theme/kpl_theme_api.py +69 -0
- mns_common/api/kpl/yidong/__init__.py +7 -0
- mns_common/api/kpl/yidong/stock_bid_yi_dong_api.py +52 -0
- mns_common/api/proxies/liu_guan_proxy_api.py +55 -5
- mns_common/api/ths/company/company_product_area_industry_index_query.py +46 -0
- mns_common/api/ths/company/ths_company_info_api.py +2 -1
- mns_common/api/ths/company/ths_company_info_web.py +159 -0
- mns_common/api/ths/concept/app/ths_concept_index_app.py +3 -1
- mns_common/api/ths/wen_cai/ths_wen_cai_api.py +1 -1
- mns_common/api/ths/zt/ths_stock_zt_pool_api.py +20 -1
- mns_common/api/ths/zt/ths_stock_zt_pool_v2_api.py +105 -29
- mns_common/api/ths/zt/ths_stock_zt_reason_web_api.py +100 -0
- mns_common/api/us/ths_us_company_info_api.py +131 -0
- mns_common/api/xueqiu/xue_qiu_k_line_api.py +31 -23
- mns_common/component/common_service_fun_api.py +28 -8
- mns_common/component/company/company_common_service_new_api.py +2 -0
- mns_common/component/cookie/cookie_enum.py +16 -0
- mns_common/component/cookie/cookie_info_service.py +18 -8
- mns_common/component/data/data_init_api.py +13 -8
- mns_common/component/deal/deal_service_api.py +70 -8
- mns_common/component/deal/deal_service_v2_api.py +167 -0
- mns_common/component/em/em_stock_info_api.py +12 -3
- mns_common/component/exception/ExceptionMonitor.py +86 -0
- mns_common/component/exception/__init__.py +7 -0
- mns_common/component/main_line/__init__.py +7 -0
- mns_common/component/main_line/main_line_zt_reason_service.py +257 -0
- mns_common/component/proxies/proxy_common_api.py +199 -31
- mns_common/component/tfp/stock_tfp_api.py +82 -12
- mns_common/component/us/__init__.py +7 -0
- mns_common/component/us/us_stock_etf_info_api.py +130 -0
- mns_common/constant/db_name_constant.py +75 -26
- mns_common/constant/extra_income_db_name.py +97 -11
- mns_common/constant/strategy_classify.py +72 -0
- mns_common/db/MongodbUtil.py +3 -0
- mns_common/db/MongodbUtilLocal.py +3 -0
- {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/METADATA +1 -1
- {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/RECORD +64 -47
- mns_common/api/ths/concept/web/ths_company_info_web.py +0 -163
- mns_common/component/qmt/qmt_buy_service.py +0 -172
- mns_common/component/task/real_time_data_sync_check.py +0 -97
- /mns_common/{component/qmt → api/foreign_exchange}/__init__.py +0 -0
- /mns_common/{component/task → api/kpl/theme}/__init__.py +0 -0
- {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/WHEEL +0 -0
- {mns_common-1.3.9.2.dist-info → mns_common-1.6.1.4.dist-info}/top_level.txt +0 -0
|
@@ -6,22 +6,22 @@ end = file_path.index('mns') + 16
|
|
|
6
6
|
project_path = file_path[0:end]
|
|
7
7
|
sys.path.append(project_path)
|
|
8
8
|
|
|
9
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
10
9
|
import pandas as pd
|
|
11
10
|
from loguru import logger
|
|
12
11
|
import requests
|
|
13
12
|
import time
|
|
14
13
|
import numpy as np
|
|
14
|
+
import mns_common.component.proxies.proxy_common_api as proxy_common_api
|
|
15
|
+
import concurrent.futures
|
|
16
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
17
|
+
from threading import Lock
|
|
18
|
+
import mns_common.utils.data_frame_util as data_frame_util
|
|
15
19
|
|
|
16
|
-
# 最大返回条数
|
|
17
|
-
max_number = 1200
|
|
18
|
-
# 最小返回条数
|
|
19
|
-
min_number = 1000
|
|
20
20
|
# 分页条数
|
|
21
21
|
page_number = 100
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
def
|
|
24
|
+
def get_etf_count(pn, proxies, page_size, time_out):
|
|
25
25
|
"""
|
|
26
26
|
东方财富-ETF 实时行情
|
|
27
27
|
https://quote.eastmoney.com/center/gridlist.html#fund_etf
|
|
@@ -32,7 +32,7 @@ def get_fund_etf_page_df(pn, proxies) -> pd.DataFrame:
|
|
|
32
32
|
url = "https://88.push2.eastmoney.com/api/qt/clist/get"
|
|
33
33
|
params = {
|
|
34
34
|
"pn": str(pn),
|
|
35
|
-
"pz":
|
|
35
|
+
"pz": str(page_size),
|
|
36
36
|
"po": "1",
|
|
37
37
|
"np": "3",
|
|
38
38
|
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
@@ -53,16 +53,59 @@ def get_fund_etf_page_df(pn, proxies) -> pd.DataFrame:
|
|
|
53
53
|
}
|
|
54
54
|
try:
|
|
55
55
|
if proxies is None:
|
|
56
|
-
r = requests.get(url, params)
|
|
56
|
+
r = requests.get(url, params, timeout=time_out)
|
|
57
57
|
else:
|
|
58
|
-
r = requests.get(url, params, proxies=proxies)
|
|
58
|
+
r = requests.get(url, params, proxies=proxies, timeout=time_out)
|
|
59
|
+
data_json = r.json()
|
|
60
|
+
total_number = int(data_json['data']['total'])
|
|
61
|
+
return total_number
|
|
62
|
+
except Exception as e:
|
|
63
|
+
logger.error("获取ETF列表,实时行情异常:{}", e)
|
|
64
|
+
return 0
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def get_fund_etf_page_df(pn, proxies, page_size, time_out) -> pd.DataFrame:
|
|
68
|
+
"""
|
|
69
|
+
东方财富-ETF 实时行情
|
|
70
|
+
https://quote.eastmoney.com/center/gridlist.html#fund_etf
|
|
71
|
+
:return: ETF 实时行情
|
|
72
|
+
:rtype: pandas.DataFrame
|
|
73
|
+
"""
|
|
74
|
+
current_timestamp = str(int(round(time.time() * 1000, 0)))
|
|
75
|
+
url = "https://88.push2.eastmoney.com/api/qt/clist/get"
|
|
76
|
+
params = {
|
|
77
|
+
"pn": str(pn),
|
|
78
|
+
"pz": str(page_size),
|
|
79
|
+
"po": "1",
|
|
80
|
+
"np": "3",
|
|
81
|
+
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
82
|
+
"fltt": "2",
|
|
83
|
+
"invt": "2",
|
|
84
|
+
"wbp2u": "|0|0|0|web",
|
|
85
|
+
"fid": "f12",
|
|
86
|
+
"fs": "b:MK0021,b:MK0022,b:MK0023,b:MK0024",
|
|
87
|
+
"fields": (
|
|
88
|
+
"f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,"
|
|
89
|
+
"f12,f13,f14,f15,f16,f17,f18,f20,f21,"
|
|
90
|
+
"f23,f24,f25,f26,f22,f11,f30,f31,f32,f33,"
|
|
91
|
+
"f34,f35,f38,f62,f63,f64,f65,f66,f69,"
|
|
92
|
+
"f72,f75,f78,f81,f84,f87,f115,f124,f128,"
|
|
93
|
+
"f136,f152,f184,f297,f402,f441"
|
|
94
|
+
),
|
|
95
|
+
"_": str(current_timestamp),
|
|
96
|
+
}
|
|
97
|
+
try:
|
|
98
|
+
if proxies is None:
|
|
99
|
+
r = requests.get(url, params, timeout=time_out)
|
|
100
|
+
else:
|
|
101
|
+
r = requests.get(url, params, proxies=proxies, timeout=time_out)
|
|
59
102
|
data_json = r.json()
|
|
60
103
|
if pn == 1:
|
|
61
104
|
try:
|
|
62
105
|
global max_number
|
|
63
106
|
max_number = int(data_json['data']['total'])
|
|
64
107
|
except Exception as e:
|
|
65
|
-
logger.error(
|
|
108
|
+
logger.error("获取第{}页ETF列表异常:{}", page_size, str(e))
|
|
66
109
|
return pd.DataFrame()
|
|
67
110
|
|
|
68
111
|
temp_df = pd.DataFrame(data_json["data"]["diff"])
|
|
@@ -352,23 +395,118 @@ def rename_etf(fund_etf_spot_em_df):
|
|
|
352
395
|
return fund_etf_spot_em_df
|
|
353
396
|
|
|
354
397
|
|
|
355
|
-
def
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
398
|
+
def repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread):
|
|
399
|
+
per_page = page_number
|
|
400
|
+
total_pages = (max_number + per_page - 1) // per_page # 向上取整
|
|
401
|
+
result_df = pd.DataFrame()
|
|
402
|
+
|
|
403
|
+
# 创建线程锁以确保线程安全
|
|
404
|
+
df_lock = Lock()
|
|
405
|
+
|
|
406
|
+
# 计算每个线程处理的页数范围
|
|
407
|
+
def process_page_range(start_page, end_page, thread_id):
|
|
408
|
+
nonlocal result_df
|
|
409
|
+
local_df = pd.DataFrame()
|
|
410
|
+
current_page = start_page
|
|
411
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
412
|
+
|
|
413
|
+
while current_page <= end_page and current_page <= total_pages:
|
|
414
|
+
proxies = {"https": proxy_ip, "http": proxy_ip}
|
|
415
|
+
try:
|
|
416
|
+
page_df = get_fund_etf_page_df(current_page, proxies, page_number, time_out)
|
|
417
|
+
if data_frame_util.is_not_empty(page_df):
|
|
418
|
+
local_df = pd.concat([local_df, page_df])
|
|
419
|
+
logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
|
|
420
|
+
current_page += 1
|
|
421
|
+
else:
|
|
422
|
+
time.sleep(0.2)
|
|
423
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
424
|
+
logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
|
|
425
|
+
except BaseException as e:
|
|
426
|
+
time.sleep(1)
|
|
427
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
428
|
+
logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
|
|
429
|
+
|
|
430
|
+
with df_lock:
|
|
431
|
+
result_df = pd.concat([result_df, local_df])
|
|
432
|
+
return len(local_df)
|
|
433
|
+
|
|
434
|
+
# 计算每个线程的页面范围
|
|
435
|
+
page_ranges = []
|
|
436
|
+
for i in range(num_threads):
|
|
437
|
+
start_page = i * pages_per_thread + 1
|
|
438
|
+
end_page = (i + 1) * pages_per_thread
|
|
439
|
+
if start_page > total_pages:
|
|
440
|
+
break
|
|
441
|
+
page_ranges.append((start_page, end_page, i + 1))
|
|
442
|
+
|
|
443
|
+
# 使用线程池执行任务
|
|
444
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
|
|
445
|
+
# 提交所有任务
|
|
446
|
+
futures = [
|
|
447
|
+
executor.submit(process_page_range, start, end, tid)
|
|
448
|
+
for start, end, tid in page_ranges
|
|
449
|
+
]
|
|
450
|
+
|
|
451
|
+
# 等待所有任务完成并获取结果
|
|
452
|
+
results = []
|
|
453
|
+
for future in concurrent.futures.as_completed(futures):
|
|
454
|
+
try:
|
|
455
|
+
result = future.result()
|
|
456
|
+
results.append(result)
|
|
457
|
+
except Exception as e:
|
|
458
|
+
logger.error("线程执行出错: {}", e)
|
|
459
|
+
|
|
460
|
+
return rename_etf(result_df)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
def get_etf_real_time_quotes(time_out, pages_per_thread):
|
|
464
|
+
try_numer = 3
|
|
465
|
+
while try_numer > 0:
|
|
466
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
467
|
+
proxies = {"https": proxy_ip,
|
|
468
|
+
"http": proxy_ip}
|
|
469
|
+
|
|
470
|
+
max_number = get_etf_count(1, proxies, 20, time_out)
|
|
471
|
+
if max_number > 0:
|
|
472
|
+
break
|
|
473
|
+
try_numer = try_numer - 1
|
|
474
|
+
if max_number == 0:
|
|
475
|
+
max_number==2000
|
|
476
|
+
|
|
477
|
+
total_pages = (max_number + page_number - 1) // page_number # 向上取整
|
|
478
|
+
|
|
479
|
+
num_threads = int((total_pages / pages_per_thread) + 1)
|
|
480
|
+
return repeated_acquisition_ask_etf_async(time_out, max_number, num_threads, pages_per_thread)
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def get_etf_real_time_quotes_local_ip(time_out):
|
|
484
|
+
try_numer = 3
|
|
485
|
+
while try_numer > 0:
|
|
486
|
+
max_number = get_etf_count(1, None, 20, time_out)
|
|
487
|
+
if max_number > 0:
|
|
488
|
+
break
|
|
489
|
+
try_numer = try_numer - 1
|
|
490
|
+
if max_number == 0:
|
|
491
|
+
max_number = 2000
|
|
492
|
+
total_pages = (max_number + page_number - 1) // page_number # 向上取整
|
|
493
|
+
|
|
494
|
+
results_df = pd.DataFrame()
|
|
495
|
+
pn = 1
|
|
496
|
+
while pn <= total_pages:
|
|
497
|
+
try:
|
|
498
|
+
page_df = get_fund_etf_page_df(pn, None, page_number, time_out)
|
|
499
|
+
while data_frame_util.is_empty(page_df):
|
|
500
|
+
page_df = get_fund_etf_page_df(pn, None, page_number, time_out)
|
|
501
|
+
time.sleep(1)
|
|
502
|
+
results_df = pd.concat([results_df, page_df])
|
|
503
|
+
logger.info("同步A市场ETF第几{}页成功", pn)
|
|
504
|
+
pn = pn + 1
|
|
505
|
+
except BaseException as e:
|
|
506
|
+
logger.error("同步A市场ETF信息失败:{},{}", e, pn)
|
|
507
|
+
return rename_etf(results_df)
|
|
368
508
|
|
|
369
509
|
|
|
370
510
|
if __name__ == '__main__':
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
fund_etf_df = fund_etf_df.fillna(0)
|
|
374
|
-
print(fund_etf_df)
|
|
511
|
+
test_df = get_etf_real_time_quotes_local_ip(30)
|
|
512
|
+
print(test_df)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import os
|
|
3
|
+
import time
|
|
3
4
|
|
|
4
5
|
file_path = os.path.abspath(__file__)
|
|
5
6
|
end = file_path.index('mns') + 16
|
|
6
7
|
project_path = file_path[0:end]
|
|
7
8
|
sys.path.append(project_path)
|
|
8
|
-
|
|
9
|
+
from mns_common.db.MongodbUtil import MongodbUtil
|
|
9
10
|
import requests
|
|
10
11
|
import json
|
|
11
12
|
import pandas as pd
|
|
@@ -14,15 +15,20 @@ import datetime
|
|
|
14
15
|
from loguru import logger
|
|
15
16
|
import mns_common.utils.data_frame_util as data_frame_util
|
|
16
17
|
|
|
18
|
+
mongodb_util = MongodbUtil('27017')
|
|
19
|
+
fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,"
|
|
20
|
+
"f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f100,f184,f211,f212"),
|
|
21
|
+
fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
|
|
22
|
+
|
|
17
23
|
# 最大返回条数
|
|
18
24
|
max_number = 5800
|
|
19
25
|
# 最小返回条数
|
|
20
26
|
min_number = 5600
|
|
21
27
|
# 分页条数
|
|
22
|
-
|
|
28
|
+
PAGE_SIZE = 100
|
|
23
29
|
|
|
24
30
|
|
|
25
|
-
def get_stock_page_data(pn,
|
|
31
|
+
def get_stock_page_data(pn, proxies, page_size, time_out):
|
|
26
32
|
"""
|
|
27
33
|
获取单页股票数据
|
|
28
34
|
"""
|
|
@@ -36,7 +42,7 @@ def get_stock_page_data(pn, fields, fs, proxies):
|
|
|
36
42
|
params = {
|
|
37
43
|
"cb": "jQuery1124046660442520420653_" + str(current_timestamp_ms),
|
|
38
44
|
"pn": str(pn),
|
|
39
|
-
"pz":
|
|
45
|
+
"pz": str(page_size), # 每页最大200条
|
|
40
46
|
"po": "0",
|
|
41
47
|
"np": "3",
|
|
42
48
|
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
@@ -50,21 +56,12 @@ def get_stock_page_data(pn, fields, fs, proxies):
|
|
|
50
56
|
}
|
|
51
57
|
try:
|
|
52
58
|
if proxies is None:
|
|
53
|
-
r = requests.get(url, params)
|
|
59
|
+
r = requests.get(url, params, timeout=time_out)
|
|
54
60
|
else:
|
|
55
|
-
r = requests.get(url, params, proxies=proxies)
|
|
61
|
+
r = requests.get(url, params, proxies=proxies, timeout=time_out)
|
|
56
62
|
|
|
57
63
|
data_text = r.text
|
|
58
|
-
if pn == 1:
|
|
59
|
-
try:
|
|
60
|
-
begin_index_total = data_text.index('"total":')
|
|
61
64
|
|
|
62
|
-
end_index_total = data_text.index('"diff"')
|
|
63
|
-
global max_number
|
|
64
|
-
max_number = int(data_text[begin_index_total + 8:end_index_total - 1])
|
|
65
|
-
except Exception as e:
|
|
66
|
-
logger.error(f"获取第{pn}页股票列表异常: {e}")
|
|
67
|
-
return pd.DataFrame()
|
|
68
65
|
|
|
69
66
|
begin_index = data_text.index('[')
|
|
70
67
|
end_index = data_text.index(']')
|
|
@@ -73,24 +70,26 @@ def get_stock_page_data(pn, fields, fs, proxies):
|
|
|
73
70
|
if data_json is None:
|
|
74
71
|
return pd.DataFrame()
|
|
75
72
|
else:
|
|
76
|
-
|
|
73
|
+
result_df = pd.DataFrame(data_json)
|
|
74
|
+
result_df['page_number'] = pn
|
|
75
|
+
return result_df
|
|
77
76
|
except Exception as e:
|
|
78
|
-
logger.error(
|
|
77
|
+
# logger.error("获取第{}页股票列表异常:{}", pn, str(e))
|
|
79
78
|
return pd.DataFrame()
|
|
80
79
|
|
|
81
80
|
|
|
82
|
-
def all_stock_ticker_data_new(
|
|
81
|
+
def all_stock_ticker_data_new(proxies, time_out) -> pd.DataFrame:
|
|
83
82
|
"""
|
|
84
83
|
使用多线程获取所有股票数据
|
|
85
84
|
"""
|
|
86
85
|
|
|
87
|
-
per_page =
|
|
86
|
+
per_page = PAGE_SIZE
|
|
88
87
|
total_pages = (max_number + per_page - 1) // per_page # 向上取整
|
|
89
88
|
|
|
90
89
|
# 创建线程池
|
|
91
90
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
92
91
|
# 提交任务,获取每页数据
|
|
93
|
-
futures = [executor.submit(get_stock_page_data, pn,
|
|
92
|
+
futures = [executor.submit(get_stock_page_data, pn, proxies, PAGE_SIZE, time_out)
|
|
94
93
|
for pn in range(1, total_pages + 1)]
|
|
95
94
|
|
|
96
95
|
# 收集结果
|
|
@@ -107,22 +106,11 @@ def all_stock_ticker_data_new(fields, fs, proxies) -> pd.DataFrame:
|
|
|
107
106
|
return pd.DataFrame()
|
|
108
107
|
|
|
109
108
|
|
|
110
|
-
def get_real_time_quotes_all_stocks(proxies):
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
page_one_df = get_stock_page_data(1, fields, fs, proxies)
|
|
116
|
-
# 数据接口正常返回5600以上的数量
|
|
117
|
-
if page_one_df.shape[0] > min_number:
|
|
118
|
-
page_one_df = rename_real_time_quotes_df(page_one_df)
|
|
119
|
-
page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
|
|
120
|
-
return page_one_df
|
|
121
|
-
else:
|
|
122
|
-
page_df = all_stock_ticker_data_new(fields, fs, proxies)
|
|
123
|
-
page_df = rename_real_time_quotes_df(page_df)
|
|
124
|
-
page_df.drop_duplicates('symbol', keep='last', inplace=True)
|
|
125
|
-
return page_df
|
|
109
|
+
def get_real_time_quotes_all_stocks(proxies, time_out):
|
|
110
|
+
page_df = all_stock_ticker_data_new(proxies, time_out)
|
|
111
|
+
page_df = rename_real_time_quotes_df(page_df)
|
|
112
|
+
page_df.drop_duplicates('symbol', keep='last', inplace=True)
|
|
113
|
+
return page_df
|
|
126
114
|
|
|
127
115
|
|
|
128
116
|
# 获取所有股票实时行情数据 f33,委比
|
|
@@ -295,8 +283,19 @@ def get_sum_north_south_net_buy_amt():
|
|
|
295
283
|
return df
|
|
296
284
|
|
|
297
285
|
|
|
286
|
+
import mns_common.component.proxies.proxy_common_api as proxy_common_api
|
|
287
|
+
|
|
298
288
|
# 示例调用
|
|
299
289
|
if __name__ == "__main__":
|
|
290
|
+
|
|
300
291
|
while True:
|
|
301
|
-
|
|
302
|
-
|
|
292
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
293
|
+
proxy = {"https": proxy_ip}
|
|
294
|
+
logger.info(proxy_ip)
|
|
295
|
+
df = all_stock_ticker_data_new(proxy, 3)
|
|
296
|
+
logger.info("数据条数,{}", df.shape[0])
|
|
297
|
+
time.sleep(1)
|
|
298
|
+
|
|
299
|
+
else:
|
|
300
|
+
time.sleep(1)
|
|
301
|
+
logger.error("ip为空")
|
|
@@ -7,11 +7,20 @@ project_path = file_path[0:end]
|
|
|
7
7
|
sys.path.append(project_path)
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
|
+
|
|
11
|
+
import mns_common.utils.data_frame_util as data_frame_util
|
|
10
12
|
import json
|
|
11
|
-
import pandas as pd
|
|
12
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
13
13
|
import datetime
|
|
14
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
|
+
import mns_common.component.proxies.proxy_common_api as proxy_common_api
|
|
14
16
|
from loguru import logger
|
|
17
|
+
import concurrent.futures
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import time
|
|
20
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
21
|
+
from threading import Lock
|
|
22
|
+
import mns_common.api.em.real_time.real_time_quotes_repeat_api as real_time_quotes_repeat_api
|
|
23
|
+
import mns_common.api.em.real_time.east_money_stock_common_api as east_money_stock_common_api
|
|
15
24
|
|
|
16
25
|
#
|
|
17
26
|
# fields_02 = "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108" \
|
|
@@ -20,6 +29,8 @@ from loguru import logger
|
|
|
20
29
|
# ",f309,f310,f312,f313,f314,f315,f316,f317,f318,f319,f320,f321,f322,f323,f324,f325,f326,f327,f328,f329,f330,f331,f332,f333,f334,f335,f336,f337,f338,f339,f340,f341,f342,f343,f344,f345,f346,f347,f348,f349,f350,f351,f352,f353,f354,f355,f356,f357,f358,f359,f360,f361,f362,f363,f364,f365,f366,f367,f368,f369,f370,f371,f372,f373,f374,f375,f376,f377,f378,f379,f380,f381,f382,f383,f384,f385,f386,f387,f388,f389,f390,f391,f392,f393,f394,f395,f396,f397,f398,f399,f401"
|
|
21
30
|
|
|
22
31
|
|
|
32
|
+
fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
|
|
33
|
+
|
|
23
34
|
fields = ("f2,f3,f5,f6,f8,"
|
|
24
35
|
"f9,f10,f22,f12,f13,"
|
|
25
36
|
"f14,f15,f16,f17,f18,"
|
|
@@ -31,15 +42,11 @@ fields = ("f2,f3,f5,f6,f8,"
|
|
|
31
42
|
"f84,f102,f184,f100,f103,"
|
|
32
43
|
"f352,f191,f193,f24,f25")
|
|
33
44
|
|
|
34
|
-
# 最大返回条数
|
|
35
|
-
max_number = 5800
|
|
36
|
-
# 最小返回条数
|
|
37
|
-
min_number = 5600
|
|
38
45
|
# 分页条数
|
|
39
|
-
|
|
46
|
+
PAGE_SIZE = 100
|
|
40
47
|
|
|
41
48
|
|
|
42
|
-
def get_stock_page_data(pn,
|
|
49
|
+
def get_stock_page_data(pn, proxies, page_number, time_out):
|
|
43
50
|
"""
|
|
44
51
|
获取单页股票数据
|
|
45
52
|
"""
|
|
@@ -53,7 +60,7 @@ def get_stock_page_data(pn, fields, fs, proxies):
|
|
|
53
60
|
params = {
|
|
54
61
|
"cb": "jQuery1124046660442520420653_" + str(current_timestamp_ms),
|
|
55
62
|
"pn": str(pn),
|
|
56
|
-
"pz":
|
|
63
|
+
"pz": str(page_number), # 每页最大200条
|
|
57
64
|
"po": "1",
|
|
58
65
|
"np": "3",
|
|
59
66
|
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
@@ -67,9 +74,9 @@ def get_stock_page_data(pn, fields, fs, proxies):
|
|
|
67
74
|
}
|
|
68
75
|
try:
|
|
69
76
|
if proxies is None:
|
|
70
|
-
r = requests.get(url, params)
|
|
77
|
+
r = requests.get(url, params, timeout=time_out)
|
|
71
78
|
else:
|
|
72
|
-
r = requests.get(url, params, proxies=proxies)
|
|
79
|
+
r = requests.get(url, params, proxies=proxies, timeout=time_out)
|
|
73
80
|
data_text = r.text
|
|
74
81
|
begin_index = data_text.index('[')
|
|
75
82
|
end_index = data_text.index(']')
|
|
@@ -80,52 +87,72 @@ def get_stock_page_data(pn, fields, fs, proxies):
|
|
|
80
87
|
else:
|
|
81
88
|
return pd.DataFrame(data_json)
|
|
82
89
|
except Exception as e:
|
|
83
|
-
logger.error(f"获取第{pn}页股票列表异常: {e}")
|
|
84
90
|
return pd.DataFrame()
|
|
85
91
|
|
|
86
92
|
|
|
87
|
-
def all_stock_ticker_data_new(
|
|
88
|
-
"""
|
|
89
|
-
使用多线程获取所有股票数据
|
|
93
|
+
def all_stock_ticker_data_new(initial_proxies, time_out, max_number) -> pd.DataFrame:
|
|
90
94
|
"""
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
#
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
95
|
+
使用多线程获取所有股票数据,失败页面会使用新IP重试,最多使用10个IP
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
total_pages = (max_number + PAGE_SIZE - 1) // PAGE_SIZE # 向上取整
|
|
99
|
+
all_pages = set(range(1, total_pages + 1)) # 所有需要获取的页码
|
|
100
|
+
success_pages = set() # 成功获取的页码
|
|
101
|
+
results = [] # 存储成功获取的数据
|
|
102
|
+
used_ip_count = 1 # 已使用IP计数器(初始IP算第一个)
|
|
103
|
+
MAX_IP_LIMIT = 10 # IP使用上限
|
|
104
|
+
|
|
105
|
+
# 循环处理直到所有页面成功或达到IP上限
|
|
106
|
+
while (all_pages - success_pages) and (used_ip_count < MAX_IP_LIMIT):
|
|
107
|
+
# 获取当前需要处理的失败页码
|
|
108
|
+
current_failed_pages = all_pages - success_pages
|
|
109
|
+
if used_ip_count > 1:
|
|
110
|
+
logger.info("当前需要处理的失败页码: {}, 已使用IP数量: {}/{}", current_failed_pages, used_ip_count,
|
|
111
|
+
MAX_IP_LIMIT)
|
|
112
|
+
|
|
113
|
+
# 首次使用初始代理,后续获取新代理
|
|
114
|
+
if len(success_pages) == 0:
|
|
115
|
+
proxies = initial_proxies
|
|
116
|
+
else:
|
|
117
|
+
# 每次重试前获取新代理并计数
|
|
118
|
+
# logger.info("获取新代理IP处理失败页面")
|
|
119
|
+
new_proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
120
|
+
proxies = {"https": new_proxy_ip}
|
|
121
|
+
# logger.info("新代理IP: {}, 已使用IP数量: {}/{}", new_proxy_ip, used_ip_count + 1, MAX_IP_LIMIT)
|
|
122
|
+
used_ip_count += 1 # 增加IP计数器
|
|
123
|
+
|
|
124
|
+
# 创建线程池处理当前失败的页码
|
|
125
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
126
|
+
futures = {
|
|
127
|
+
executor.submit(get_stock_page_data, pn, proxies, PAGE_SIZE, time_out): pn
|
|
128
|
+
for pn in current_failed_pages
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
# 收集结果并记录成功页码
|
|
132
|
+
for future, pn in futures.items():
|
|
133
|
+
try:
|
|
134
|
+
result = future.result()
|
|
135
|
+
if not result.empty:
|
|
136
|
+
results.append(result)
|
|
137
|
+
success_pages.add(pn)
|
|
138
|
+
# else:
|
|
139
|
+
# logger.warning("页码 {} 未返回有效数据", pn)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
continue
|
|
142
|
+
# logger.error("页码 {} 处理异常: {}", pn, str(e))
|
|
143
|
+
|
|
144
|
+
# 检查是否达到IP上限
|
|
145
|
+
if used_ip_count >= MAX_IP_LIMIT and (all_pages - success_pages):
|
|
146
|
+
remaining_pages = all_pages - success_pages
|
|
147
|
+
logger.warning("已达到最大IP使用限制({}个),剩余未获取页码: {}, 返回现有数据", MAX_IP_LIMIT, remaining_pages)
|
|
148
|
+
|
|
149
|
+
# 合并所有成功获取的数据
|
|
109
150
|
if results:
|
|
110
151
|
return pd.concat(results, ignore_index=True)
|
|
111
152
|
else:
|
|
112
153
|
return pd.DataFrame()
|
|
113
154
|
|
|
114
155
|
|
|
115
|
-
def get_all_real_time_quotes(proxies):
|
|
116
|
-
fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
|
|
117
|
-
# 获取第一页数据
|
|
118
|
-
page_one_df = get_stock_page_data(1, fields, fs, proxies)
|
|
119
|
-
# 数据接口正常返回5600以上的数量
|
|
120
|
-
if page_one_df.shape[0] > min_number:
|
|
121
|
-
page_one_df = rename_real_time_quotes_df(page_one_df)
|
|
122
|
-
return page_one_df
|
|
123
|
-
else:
|
|
124
|
-
page_df = all_stock_ticker_data_new(fields, fs, proxies)
|
|
125
|
-
page_df = rename_real_time_quotes_df(page_df)
|
|
126
|
-
return page_df
|
|
127
|
-
|
|
128
|
-
|
|
129
156
|
# 获取所有股票实时行情数据 f33,委比
|
|
130
157
|
def rename_real_time_quotes_df(temp_df):
|
|
131
158
|
temp_df = temp_df.rename(columns={
|
|
@@ -286,11 +313,28 @@ def rename_real_time_quotes_df(temp_df):
|
|
|
286
313
|
return temp_df
|
|
287
314
|
|
|
288
315
|
|
|
316
|
+
def get_stock_real_time_quotes(time_out):
|
|
317
|
+
try_numer = 3
|
|
318
|
+
while try_numer > 0:
|
|
319
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
320
|
+
initial_proxies = {"https": proxy_ip,
|
|
321
|
+
"http": proxy_ip}
|
|
322
|
+
|
|
323
|
+
total_number = east_money_stock_common_api.get_stocks_num(1, initial_proxies, 20, time_out)
|
|
324
|
+
if total_number > 0:
|
|
325
|
+
break
|
|
326
|
+
try_numer = try_numer - 1
|
|
327
|
+
if total_number == 0:
|
|
328
|
+
return pd.DataFrame()
|
|
329
|
+
|
|
330
|
+
page_df = all_stock_ticker_data_new(initial_proxies, time_out, total_number)
|
|
331
|
+
page_df = rename_real_time_quotes_df(page_df)
|
|
332
|
+
page_df.drop_duplicates('symbol', keep='last', inplace=True)
|
|
333
|
+
return page_df
|
|
334
|
+
|
|
335
|
+
|
|
289
336
|
# 示例调用
|
|
290
337
|
if __name__ == "__main__":
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
print(df)
|
|
295
|
-
zt_df = df.loc[df['wei_bi'] == 100]
|
|
296
|
-
logger.info("涨停数据,{}", zt_df)
|
|
338
|
+
df = get_stock_real_time_quotes(5)
|
|
339
|
+
zt_df = df.loc[df['wei_bi'] == 100]
|
|
340
|
+
logger.info("同步次数", zt_df.shape[0])
|