mns-common 1.4.1.8__py3-none-any.whl → 1.5.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mns_common/api/akshare/__init__.py +0 -1
- mns_common/api/akshare/k_line_api.py +19 -2
- mns_common/api/akshare/stock_bid_ask_api.py +10 -3
- mns_common/api/akshare/stock_zb_pool.py +2 -0
- mns_common/api/akshare/stock_zt_pool_api.py +1 -1
- mns_common/api/em/gd/east_money_stock_gdfx_free_top_10_api.py +62 -7
- mns_common/api/em/real_time/__init__.py +1 -1
- mns_common/api/em/real_time/east_money_debt_api.py +140 -70
- mns_common/api/em/real_time/east_money_etf_api.py +138 -27
- mns_common/api/em/real_time/east_money_stock_a_api.py +24 -28
- mns_common/api/em/real_time/east_money_stock_a_v2_api.py +97 -53
- mns_common/api/em/real_time/east_money_stock_common_api.py +174 -0
- mns_common/api/em/real_time/east_money_stock_hk_api.py +223 -272
- mns_common/api/em/real_time/east_money_stock_hk_gtt_api.py +260 -0
- mns_common/api/em/real_time/east_money_stock_multi_thread_api_v3.py +154 -0
- mns_common/api/em/real_time/east_money_stock_us_api.py +146 -82
- mns_common/api/em/real_time/real_time_quotes_repeat_api.py +195 -0
- mns_common/api/k_line/stock_k_line_data_api.py +11 -1
- mns_common/api/kpl/common/kpl_common_api.py +35 -0
- mns_common/api/proxies/liu_guan_proxy_api.py +55 -5
- mns_common/api/ths/company/company_product_area_industry_index_query.py +46 -0
- mns_common/api/ths/company/ths_company_info_api.py +2 -1
- mns_common/api/ths/company/ths_company_info_web.py +159 -0
- mns_common/api/ths/concept/app/ths_concept_index_app.py +3 -1
- mns_common/api/ths/wen_cai/ths_wen_cai_api.py +1 -1
- mns_common/api/ths/zt/ths_stock_zt_pool_api.py +20 -1
- mns_common/api/ths/zt/ths_stock_zt_pool_v2_api.py +105 -29
- mns_common/api/xueqiu/xue_qiu_k_line_api.py +2 -2
- mns_common/component/common_service_fun_api.py +26 -6
- mns_common/component/data/data_init_api.py +13 -8
- mns_common/component/deal/deal_service_api.py +70 -8
- mns_common/component/deal/deal_service_v2_api.py +167 -0
- mns_common/component/em/em_stock_info_api.py +9 -3
- mns_common/component/main_line/main_line_zt_reason_service.py +237 -0
- mns_common/component/proxies/proxy_common_api.py +141 -45
- mns_common/component/us/us_stock_etf_info_api.py +125 -0
- mns_common/constant/db_name_constant.py +40 -16
- mns_common/constant/extra_income_db_name.py +79 -19
- mns_common/constant/strategy_classify.py +17 -2
- mns_common/db/MongodbUtil.py +3 -0
- mns_common/db/MongodbUtilLocal.py +3 -0
- {mns_common-1.4.1.8.dist-info → mns_common-1.5.7.2.dist-info}/METADATA +1 -1
- {mns_common-1.4.1.8.dist-info → mns_common-1.5.7.2.dist-info}/RECORD +47 -41
- mns_common/api/ths/concept/web/ths_company_info_web.py +0 -163
- mns_common/component/qmt/qmt_buy_service.py +0 -172
- mns_common/component/task/real_time_data_sync_check.py +0 -110
- /mns_common/component/{qmt → main_line}/__init__.py +0 -0
- /mns_common/component/{task → us}/__init__.py +0 -0
- {mns_common-1.4.1.8.dist-info → mns_common-1.5.7.2.dist-info}/WHEEL +0 -0
- {mns_common-1.4.1.8.dist-info → mns_common-1.5.7.2.dist-info}/top_level.txt +0 -0
|
@@ -10,19 +10,23 @@ import pandas as pd
|
|
|
10
10
|
from loguru import logger
|
|
11
11
|
import requests
|
|
12
12
|
import time
|
|
13
|
+
import mns_common.component.proxies.proxy_common_api as proxy_common_api
|
|
13
14
|
|
|
14
|
-
# 最大返回条数
|
|
15
|
-
max_number = 12000
|
|
16
|
-
# 最小返回条数
|
|
17
|
-
min_number = 11000
|
|
18
15
|
# 分页条数
|
|
19
16
|
page_number = 100
|
|
20
17
|
|
|
21
|
-
fields = ("f352,f2,f3,f5,f6,f8,f10,
|
|
22
|
-
"f33,f34,f35,f62,f66,f69,f72,f100,f184,
|
|
18
|
+
fields = ("f352,f2,f3,f5,f6,f8,f10,f13,f12,f14,f15,f16,f17,f18,f20,f21,f26,"
|
|
19
|
+
"f33,f34,f35,f62,f66,f69,f72,f100,f184,f103,f383,f4,f9,f19,f265")
|
|
23
20
|
|
|
24
21
|
|
|
25
|
-
|
|
22
|
+
# fields_02 = "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25,f26,f27,f28,f29,f30,f31,f32,f33,f34,f35,f36,f37,f38,f39,f40,f41,f42,f43,f44,f45,f46,f47,f48,f49,f50,f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64,f65,f66,f67,f68,f69,f70,f71,f72,f73,f74,f75,f76,f77,f78,f79,f80,f81,f82,f83,f84,f85,f86,f87,f88,f89,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99,f100,f101,f102,f103,f104,f105,f106,f107,f108" \
|
|
23
|
+
# ",f109,f110,f111,f112,f113,f114,f115,f116,f117,f118,f119,f120,f121,f122,f123,f124,f125,f126,f127,f128,f129,f130,f131,f132,f133,f134,f135,f136,f137,f138,f139,f140,f141,f142,f143,f144,f145,f146,f147,f148,f149,f150,f151,f152,f153,f154,f155,f156,f157,f158,f159,f160,f161,f162,f163,f164,f165,f166,f167,f168,f169,f170,f171,f172,f173,f174,f175,f176,f177,f178,f179,f180,f181,f182,f183,f184,f185,f186,f187,f188,f189,f190,f191,f192,f193,f194,f195,f196,f197,f198,f199,f200,f201,f202,f203,f204,f205,f206,f207,f208" \
|
|
24
|
+
# ",f209,f210,f211,f212,f213,f214,f215,f216,f217,f218,f219,f220,f221,f222,f223,f224,f225,f226,f227,f228,f229,f230,f231,f232,f233,f234,f235,f236,f237,f238,f239,f240,f241,f242,f243,f244,f245,f246,f247,f248,f249,f250,f251,f252,f253,f254,f255,f256,f257,f258,f259,f260,f261,f262,f263,f264,f265,f266,f267,f268,f269,f270,f271,f272,f273,f274,f275,f276,f277,f278,f279,f280,f281,f282,f283,f284,f285,f286,f287,f288,f289,f290,f291,f292,f293,f294,f295,f296,f297,f298,f299,f300,f301,f302,f303,f304,f305,f306,f307,f308" \
|
|
25
|
+
# ",f309,f310,f311,f312,f313,f314,f315,f316,f317,f318,f319,f320,f321,f322,f323,f324,f325,f326,f327,f328,f329,f330,f331,f332,f333,f334,f335,f336,f337,f338,f339,f340,f341,f342,f343,f344,f345,f346,f347,f348,f349,f350,f351,f352,f353,f354,f355,f356,f357,f358,f359,f360,f361,f362,f363,f364,f365,f366,f367,f368,f369,f370,f371,f372,f373,f374,f375,f376,f377,f378,f379,f380,f381,f382,f383,f384,f385,f386,f387,f388,f389,f390,f391,f392,f393,f394,f395,f396,f397,f398,f399,f400,f401,f402,f403,f404,f405,f406,f407,f408" \
|
|
26
|
+
# ",f401"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_us_stock_count(pn, proxies, page_size, cookie, time_out):
|
|
26
30
|
try:
|
|
27
31
|
headers = {
|
|
28
32
|
'Cookie': cookie
|
|
@@ -33,7 +37,7 @@ def us_real_time_quotes_page_df(cookie, pn, proxies):
|
|
|
33
37
|
url = "https://72.push2.eastmoney.com/api/qt/clist/get"
|
|
34
38
|
params = {
|
|
35
39
|
"pn": str(pn),
|
|
36
|
-
"pz":
|
|
40
|
+
"pz": str(page_size),
|
|
37
41
|
"po": "1",
|
|
38
42
|
"np": "2",
|
|
39
43
|
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
@@ -45,18 +49,45 @@ def us_real_time_quotes_page_df(cookie, pn, proxies):
|
|
|
45
49
|
"_": str(current_timestamp),
|
|
46
50
|
}
|
|
47
51
|
if proxies is None:
|
|
48
|
-
r = requests.get(url, params=params, headers=headers)
|
|
52
|
+
r = requests.get(url, params=params, headers=headers, timeout=time_out)
|
|
49
53
|
else:
|
|
50
|
-
r = requests.get(url, params=params, headers=headers, proxies=proxies)
|
|
54
|
+
r = requests.get(url, params=params, headers=headers, proxies=proxies, timeout=time_out)
|
|
51
55
|
data_json = r.json()
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
56
|
+
total_number = int(data_json['data']['total'])
|
|
57
|
+
return total_number
|
|
58
|
+
except Exception as e:
|
|
59
|
+
logger.error("获取美股数量:{}", e)
|
|
60
|
+
return 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# 获取美股分页信息
|
|
64
|
+
def get_us_real_time_quotes_page_df(pn, proxies, page_size, cookie, time_out):
|
|
65
|
+
try:
|
|
66
|
+
headers = {
|
|
67
|
+
'Cookie': cookie
|
|
68
|
+
}
|
|
59
69
|
|
|
70
|
+
current_timestamp = str(int(round(time.time() * 1000, 0)))
|
|
71
|
+
|
|
72
|
+
url = "https://72.push2.eastmoney.com/api/qt/clist/get"
|
|
73
|
+
params = {
|
|
74
|
+
"pn": str(pn),
|
|
75
|
+
"pz": str(page_size),
|
|
76
|
+
"po": "1",
|
|
77
|
+
"np": "2",
|
|
78
|
+
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
79
|
+
"fltt": "2",
|
|
80
|
+
"invt": "2",
|
|
81
|
+
"fid": "f6",
|
|
82
|
+
"fs": "m:105,m:106,m:107",
|
|
83
|
+
"fields": fields,
|
|
84
|
+
"_": str(current_timestamp),
|
|
85
|
+
}
|
|
86
|
+
if proxies is None:
|
|
87
|
+
r = requests.get(url, params=params, headers=headers, timeout=time_out)
|
|
88
|
+
else:
|
|
89
|
+
r = requests.get(url, params=params, headers=headers, proxies=proxies, timeout=time_out)
|
|
90
|
+
data_json = r.json()
|
|
60
91
|
if not data_json["data"]["diff"]:
|
|
61
92
|
return pd.DataFrame()
|
|
62
93
|
temp_df = pd.DataFrame(data_json["data"]["diff"]).T
|
|
@@ -67,53 +98,14 @@ def us_real_time_quotes_page_df(cookie, pn, proxies):
|
|
|
67
98
|
return pd.DataFrame()
|
|
68
99
|
|
|
69
100
|
|
|
70
|
-
def thread_pool_executor(cookie, proxies):
|
|
71
|
-
"""
|
|
72
|
-
使用多线程获取所有美股数据
|
|
73
|
-
"""
|
|
74
|
-
# 计算总页数,假设总共有1000条数据,每页200条
|
|
75
|
-
|
|
76
|
-
per_page = page_number
|
|
77
|
-
total_pages = (max_number + per_page - 1) // per_page # 向上取整
|
|
78
|
-
|
|
79
|
-
# 创建线程池
|
|
80
|
-
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
81
|
-
# 提交任务,获取每页数据
|
|
82
|
-
futures = [executor.submit(us_real_time_quotes_page_df, cookie, pn, proxies)
|
|
83
|
-
for pn in range(1, total_pages + 1)]
|
|
84
|
-
|
|
85
|
-
# 收集结果
|
|
86
|
-
results = []
|
|
87
|
-
for future in futures:
|
|
88
|
-
result = future.result()
|
|
89
|
-
if not result.empty:
|
|
90
|
-
results.append(result)
|
|
91
|
-
|
|
92
|
-
# 合并所有页面的数据
|
|
93
|
-
if results:
|
|
94
|
-
return pd.concat(results, ignore_index=True)
|
|
95
|
-
else:
|
|
96
|
-
return pd.DataFrame()
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def get_us_stock_real_time_quotes(cookie, proxies):
|
|
100
|
-
# 获取第一页数据
|
|
101
|
-
page_one_df = us_real_time_quotes_page_df(cookie, 1, proxies)
|
|
102
|
-
# 数据接口正常返回5600以上的数量
|
|
103
|
-
if page_one_df.shape[0] > min_number:
|
|
104
|
-
page_one_df = rename_us_stock(page_one_df)
|
|
105
|
-
page_one_df.drop_duplicates('symbol', keep='last', inplace=True)
|
|
106
|
-
return page_one_df
|
|
107
|
-
else:
|
|
108
|
-
page_df = thread_pool_executor(cookie, proxies)
|
|
109
|
-
page_df = rename_us_stock(page_df)
|
|
110
|
-
page_df.drop_duplicates('symbol', keep='last', inplace=True)
|
|
111
|
-
return page_df
|
|
112
|
-
|
|
113
|
-
|
|
114
101
|
def rename_us_stock(temp_df):
|
|
115
102
|
temp_df = temp_df.rename(columns={
|
|
116
103
|
|
|
104
|
+
"f4": "change_price",
|
|
105
|
+
"f9": "pe_ttm",
|
|
106
|
+
|
|
107
|
+
# 1 美国本土公司 3 多个市场上市美股 如阿里巴巴 台积电 5 ETF
|
|
108
|
+
"f19": "voucher_type",
|
|
117
109
|
"f12": "symbol",
|
|
118
110
|
"f14": "name",
|
|
119
111
|
"f3": "chg",
|
|
@@ -122,9 +114,7 @@ def rename_us_stock(temp_df):
|
|
|
122
114
|
"f6": "amount",
|
|
123
115
|
"f8": "exchange",
|
|
124
116
|
"f10": "quantity_ratio",
|
|
125
|
-
"
|
|
126
|
-
"f11": "up_speed_05",
|
|
127
|
-
"f13": "simple_symbol",
|
|
117
|
+
"f13": "market_code",
|
|
128
118
|
"f15": "high",
|
|
129
119
|
"f16": "low",
|
|
130
120
|
"f17": "open",
|
|
@@ -142,16 +132,13 @@ def rename_us_stock(temp_df):
|
|
|
142
132
|
# "f78": "medium_order_net_inflow",
|
|
143
133
|
# "f84": "small_order_net_inflow",
|
|
144
134
|
"f100": "industry",
|
|
145
|
-
|
|
135
|
+
"f265": "industry_code",
|
|
136
|
+
"f103": "concept_name_str",
|
|
137
|
+
"f383": "concept_code_str",
|
|
146
138
|
"f184": "today_main_net_inflow_ratio",
|
|
147
139
|
"f352": "average_price",
|
|
148
|
-
"f211": "buy_1_num",
|
|
149
|
-
"f212": "sell_1_num"
|
|
150
140
|
})
|
|
151
|
-
|
|
152
|
-
temp_df.loc[temp_df['sell_1_num'] == '-', 'sell_1_num'] = 0
|
|
153
|
-
temp_df.loc[temp_df['up_speed_05'] == '-', 'up_speed_05'] = 0
|
|
154
|
-
temp_df.loc[temp_df['up_speed'] == '-', 'up_speed'] = 0
|
|
141
|
+
|
|
155
142
|
temp_df.loc[temp_df['average_price'] == '-', 'average_price'] = 0
|
|
156
143
|
temp_df.loc[temp_df['wei_bi'] == '-', 'wei_bi'] = 0
|
|
157
144
|
temp_df.loc[temp_df['yesterday_price'] == '-', 'yesterday_price'] = 0
|
|
@@ -219,16 +206,93 @@ def rename_us_stock(temp_df):
|
|
|
219
206
|
return temp_df
|
|
220
207
|
|
|
221
208
|
|
|
209
|
+
def all_us_stock_ticker_data_new(initial_proxies, time_out, em_cookie, max_number) -> pd.DataFrame:
|
|
210
|
+
"""
|
|
211
|
+
使用多线程获取所有股票数据,失败页面会使用新IP重试,最多使用10个IP
|
|
212
|
+
"""
|
|
213
|
+
per_page = page_number
|
|
214
|
+
total_pages = (max_number + per_page - 1) // per_page # 向上取整
|
|
215
|
+
all_pages = set(range(1, total_pages + 1)) # 所有需要获取的页码
|
|
216
|
+
success_pages = set() # 成功获取的页码
|
|
217
|
+
results = [] # 存储成功获取的数据
|
|
218
|
+
used_ip_count = 1 # 已使用IP计数器(初始IP算第一个)
|
|
219
|
+
MAX_IP_LIMIT = 10 # IP使用上限
|
|
220
|
+
|
|
221
|
+
# 循环处理直到所有页面成功或达到IP上限
|
|
222
|
+
while (all_pages - success_pages) and (used_ip_count < MAX_IP_LIMIT):
|
|
223
|
+
# 获取当前需要处理的失败页码
|
|
224
|
+
current_failed_pages = all_pages - success_pages
|
|
225
|
+
if used_ip_count > 1:
|
|
226
|
+
logger.info("当前需要处理的失败页码: {}, 已使用IP数量: {}/{}", current_failed_pages, used_ip_count,
|
|
227
|
+
MAX_IP_LIMIT)
|
|
228
|
+
|
|
229
|
+
# 首次使用初始代理,后续获取新代理
|
|
230
|
+
if len(success_pages) == 0:
|
|
231
|
+
proxies = initial_proxies
|
|
232
|
+
else:
|
|
233
|
+
# 每次重试前获取新代理并计数
|
|
234
|
+
# logger.info("获取新代理IP处理失败页面")
|
|
235
|
+
new_proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
236
|
+
proxies = {"https": new_proxy_ip}
|
|
237
|
+
# logger.info("新代理IP: {}, 已使用IP数量: {}/{}", new_proxy_ip, used_ip_count + 1, MAX_IP_LIMIT)
|
|
238
|
+
used_ip_count += 1 # 增加IP计数器
|
|
239
|
+
|
|
240
|
+
# 创建线程池处理当前失败的页码
|
|
241
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
242
|
+
futures = {
|
|
243
|
+
executor.submit(get_us_real_time_quotes_page_df, pn, proxies,
|
|
244
|
+
per_page, em_cookie, time_out): pn
|
|
245
|
+
for pn in current_failed_pages
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
# 收集结果并记录成功页码
|
|
249
|
+
for future, pn in futures.items():
|
|
250
|
+
try:
|
|
251
|
+
result = future.result()
|
|
252
|
+
if not result.empty:
|
|
253
|
+
results.append(result)
|
|
254
|
+
success_pages.add(pn)
|
|
255
|
+
# else:
|
|
256
|
+
# logger.warning("页码 {} 未返回有效数据", pn)
|
|
257
|
+
except Exception as e:
|
|
258
|
+
continue
|
|
259
|
+
# logger.error("页码 {} 处理异常: {}", pn, str(e))
|
|
260
|
+
|
|
261
|
+
# 检查是否达到IP上限
|
|
262
|
+
if used_ip_count >= MAX_IP_LIMIT and (all_pages - success_pages):
|
|
263
|
+
remaining_pages = all_pages - success_pages
|
|
264
|
+
logger.warning("已达到最大IP使用限制({}个),剩余未获取页码: {}, 返回现有数据", MAX_IP_LIMIT, remaining_pages)
|
|
265
|
+
|
|
266
|
+
# 合并所有成功获取的数据
|
|
267
|
+
if results:
|
|
268
|
+
return pd.concat(results, ignore_index=True)
|
|
269
|
+
else:
|
|
270
|
+
return pd.DataFrame()
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def get_us_real_time_quotes(time_out, em_cookie):
|
|
274
|
+
try_numer = 3
|
|
275
|
+
while try_numer > 0:
|
|
276
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
277
|
+
initial_proxies = {"https": proxy_ip,
|
|
278
|
+
"http": proxy_ip}
|
|
279
|
+
|
|
280
|
+
max_number = get_us_stock_count(1, initial_proxies, 20, em_cookie, time_out)
|
|
281
|
+
if max_number > 0:
|
|
282
|
+
break
|
|
283
|
+
try_numer = try_numer - 1
|
|
284
|
+
if max_number == 0:
|
|
285
|
+
return pd.DataFrame()
|
|
286
|
+
all_hk_stock_ticker_data_new_df = all_us_stock_ticker_data_new(initial_proxies, time_out, em_cookie, max_number)
|
|
287
|
+
return rename_us_stock(all_hk_stock_ticker_data_new_df)
|
|
288
|
+
|
|
289
|
+
|
|
222
290
|
if __name__ == '__main__':
|
|
223
291
|
cookie_test = 'qgqp_b_id=1e0d79428176ed54bef8434efdc0e8c3; mtp=1; ct=QVRY_s8Tiag1WfK2tSW2n03qpsX-PD8aH_rIjKVooawX8K33UVnpIofK088lD1lguWlE_OEIpQwn3PJWFPhHvSvyvYr4Zka3l4vxtZfH1Uikjtyy9z1H4Swo0rQzMKXncVzBXiOo5TjE-Dy9fcoG3ZF7UVdQ35jp_cFwzOlpK5Y; ut=FobyicMgeV51lVMr4ZJXvn-72bp0oeSOvtzifFY_U7kBFtR6og4Usd-VtBM5XBBvHq0lvd9xXkvpIqWro9EDKmv6cbKOQGyawUSMcKVP57isZCaM7lWQ6jWXajvTfvV4mIR-W_MZNK8VY0lL9W4qNMniJ6PBn_gkJsSAJCadmsyI9cxmjx--gR4m54pdF_nie_y4iWHys83cmWR2R7Bt1KKqB25OmkfCQTJJqIf7QsqangVGMUHwMC39Z9QhrfCFHKVNrlqS503O6b9GitQnXtvUdJhCmomu; pi=4253366368931142%3Bp4253366368931142%3B%E8%82%A1%E5%8F%8B9x56I87727%3BYNigLZRW%2FzMdGgVDOJbwReDWnTPHl51dB0gQLiwaCf1XY98mlJYx6eJbsoYr5Nie%2BX1L%2BzaMsec99KkX%2BT29Ds1arfST7sIBXxjUQ3dp11IPUnXy64PaBFRTHzMRWnCFJvvhc%2FAI41rXSGXolC8YMxI%2BvyPS%2BuErwgOVjC5vvsIiKeO7TLyKkhqqQJPX%2F7RWC5Sf3QLh%3Bdwjn4Xho10%2FKjqOgTWs%2FJF4%2FkdKzeuBwM8sz9aLvJovejAkCAyGMyGYA6AE67Xk2Ki7x8zdfBifF2DG%2Fvf2%2BXAYN8ZVISSEWTIXh32Z5MxEacK4JBTkqyiD93e1vFBOFQ82BqaiVmntUq0V6FrTUHGeh1gG5Sg%3D%3D; uidal=4253366368931142%e8%82%a1%e5%8f%8b9x56I87727; sid=170711377; vtpst=|; quote_lt=1; websitepoptg_api_time=1715777390466; emshistory=%5B%22%E8%BD%AC%E5%80%BA%E6%A0%87%22%2C%22%E8%BD%AC%E5%80%BA%E6%A0%87%E7%9A%84%22%5D; st_si=00364513876913; st_asi=delete; HAList=ty-116-00700-%u817E%u8BAF%u63A7%u80A1%2Cty-1-688695-%u4E2D%u521B%u80A1%u4EFD%2Cty-1-600849-%u4E0A%u836F%u8F6C%u6362%2Cty-1-603361-%u6D59%u6C5F%u56FD%u7965%2Cty-1-603555-ST%u8D35%u4EBA%2Cty-0-000627-%u5929%u8302%u96C6%u56E2%2Cty-0-002470-%u91D1%u6B63%u5927%2Cty-0-832876-%u6167%u4E3A%u667A%u80FD%2Cty-0-300059-%u4E1C%u65B9%u8D22%u5BCC%2Cty-107-CWB-%u53EF%u8F6C%u503AETF-SPDR; st_pvi=26930719093675; st_sp=2024-04-28%2017%3A27%3A05; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=23; st_psi=20240517111108288-113200301321-2767127768'
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
"chg",
|
|
232
|
-
"amount"
|
|
233
|
-
]]
|
|
234
|
-
logger.info('test')
|
|
292
|
+
|
|
293
|
+
page_test_df = get_us_real_time_quotes_page_df(1, None, 100, cookie_test, 30)
|
|
294
|
+
page_test_df = rename_us_stock(page_test_df)
|
|
295
|
+
|
|
296
|
+
us_test_df = get_us_real_time_quotes(30, cookie_test)
|
|
297
|
+
# us_test_df = get_us_real_time_quotes_page_df(1, None, 200, cookie_test, 30)
|
|
298
|
+
print(us_test_df)
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import mns_common.utils.data_frame_util as data_frame_util
|
|
3
|
+
import json
|
|
4
|
+
import datetime
|
|
5
|
+
import mns_common.component.proxies.proxy_common_api as proxy_common_api
|
|
6
|
+
from loguru import logger
|
|
7
|
+
import mns_common.api.em.real_time.east_money_stock_common_api as east_money_stock_common_api
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import time
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
|
|
12
|
+
fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,"
|
|
13
|
+
"f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f100,f184,f211,f212"),
|
|
14
|
+
fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
|
|
15
|
+
|
|
16
|
+
# 分页条数
|
|
17
|
+
PAGE_SIZE = 100
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_stock_page_data_time_out(pn, proxies, page_size, time_out):
|
|
21
|
+
"""
|
|
22
|
+
获取单页股票数据
|
|
23
|
+
"""
|
|
24
|
+
# 获取当前日期和时间
|
|
25
|
+
current_time = datetime.datetime.now()
|
|
26
|
+
|
|
27
|
+
# 将当前时间转换为时间戳(以毫秒为单位)
|
|
28
|
+
current_timestamp_ms = int(current_time.timestamp() * 1000)
|
|
29
|
+
|
|
30
|
+
url = "https://33.push2.eastmoney.com/api/qt/clist/get"
|
|
31
|
+
params = {
|
|
32
|
+
"cb": "jQuery1124046660442520420653_" + str(current_timestamp_ms),
|
|
33
|
+
"pn": str(pn),
|
|
34
|
+
"pz": str(page_size), # 每页最大200条
|
|
35
|
+
"po": "0",
|
|
36
|
+
"np": "3",
|
|
37
|
+
"ut": "bd1d9ddb04089700cf9c27f6f7426281",
|
|
38
|
+
"fltt": "2",
|
|
39
|
+
"invt": "2",
|
|
40
|
+
"wbp2u": "|0|0|0|web",
|
|
41
|
+
"fid": "f12",
|
|
42
|
+
"fs": fs,
|
|
43
|
+
"fields": fields,
|
|
44
|
+
"_": current_timestamp_ms
|
|
45
|
+
}
|
|
46
|
+
try:
|
|
47
|
+
if proxies is None:
|
|
48
|
+
r = requests.get(url, params, timeout=time_out)
|
|
49
|
+
else:
|
|
50
|
+
r = requests.get(url, params, proxies=proxies, timeout=time_out)
|
|
51
|
+
|
|
52
|
+
data_text = r.text
|
|
53
|
+
if pn == 1:
|
|
54
|
+
try:
|
|
55
|
+
begin_index_total = data_text.index('"total":')
|
|
56
|
+
|
|
57
|
+
end_index_total = data_text.index('"diff"')
|
|
58
|
+
global max_number
|
|
59
|
+
max_number = int(data_text[begin_index_total + 8:end_index_total - 1])
|
|
60
|
+
except Exception as e:
|
|
61
|
+
logger.error("获取第{}页股票列表异常:{}", pn, str(e))
|
|
62
|
+
return pd.DataFrame()
|
|
63
|
+
|
|
64
|
+
begin_index = data_text.index('[')
|
|
65
|
+
end_index = data_text.index(']')
|
|
66
|
+
data_json = data_text[begin_index:end_index + 1]
|
|
67
|
+
data_json = json.loads(data_json)
|
|
68
|
+
if data_json is None:
|
|
69
|
+
return pd.DataFrame()
|
|
70
|
+
else:
|
|
71
|
+
result_df = pd.DataFrame(data_json)
|
|
72
|
+
result_df['page_number'] = pn
|
|
73
|
+
return result_df
|
|
74
|
+
except Exception as e:
|
|
75
|
+
return pd.DataFrame()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def repeated_acquisition_ask_sync(time_out):
|
|
81
|
+
per_page = PAGE_SIZE
|
|
82
|
+
total_pages = (max_number + per_page - 1) // per_page # 向上取整
|
|
83
|
+
result_df = pd.DataFrame()
|
|
84
|
+
now_page = 1
|
|
85
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
86
|
+
while now_page <= total_pages:
|
|
87
|
+
proxies = {"https": proxy_ip,
|
|
88
|
+
"http": proxy_ip}
|
|
89
|
+
try:
|
|
90
|
+
page_df = get_stock_page_data_time_out(now_page, proxies, PAGE_SIZE, time_out)
|
|
91
|
+
if data_frame_util.is_not_empty(page_df):
|
|
92
|
+
result_df = pd.concat([page_df, result_df])
|
|
93
|
+
logger.info("获取页面数据成功:{}", now_page)
|
|
94
|
+
now_page = now_page + 1
|
|
95
|
+
else:
|
|
96
|
+
time.sleep(0.2)
|
|
97
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
98
|
+
logger.info("获取页面数据失败:{}", now_page)
|
|
99
|
+
except BaseException as e:
|
|
100
|
+
time.sleep(1)
|
|
101
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
102
|
+
# 示例调用
|
|
103
|
+
return result_df
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def repeated_acquisition_ask_async(initial_proxies, time_out, max_number):
|
|
107
|
+
"""
|
|
108
|
+
使用多线程获取所有股票数据,失败页面会使用新IP重试,最多使用10个IP
|
|
109
|
+
"""
|
|
110
|
+
per_page = PAGE_SIZE
|
|
111
|
+
total_pages = (max_number + per_page - 1) // per_page # 向上取整
|
|
112
|
+
all_pages = set(range(1, total_pages + 1)) # 所有需要获取的页码
|
|
113
|
+
success_pages = set() # 成功获取的页码
|
|
114
|
+
results = [] # 存储成功获取的数据
|
|
115
|
+
used_ip_count = 1 # 已使用IP计数器(初始IP算第一个)
|
|
116
|
+
MAX_IP_LIMIT = 10 # IP使用上限
|
|
117
|
+
|
|
118
|
+
# 循环处理直到所有页面成功或达到IP上限
|
|
119
|
+
while (all_pages - success_pages) and (used_ip_count < MAX_IP_LIMIT):
|
|
120
|
+
# 获取当前需要处理的失败页码
|
|
121
|
+
current_failed_pages = all_pages - success_pages
|
|
122
|
+
if used_ip_count > 1:
|
|
123
|
+
logger.info("当前需要处理的失败页码: {}, 已使用IP数量: {}/{}", current_failed_pages, used_ip_count,
|
|
124
|
+
MAX_IP_LIMIT)
|
|
125
|
+
|
|
126
|
+
# 首次使用初始代理,后续获取新代理
|
|
127
|
+
if len(success_pages) == 0:
|
|
128
|
+
proxies = initial_proxies
|
|
129
|
+
else:
|
|
130
|
+
# 每次重试前获取新代理并计数
|
|
131
|
+
# logger.info("获取新代理IP处理失败页面")
|
|
132
|
+
new_proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
133
|
+
proxies = {"https": new_proxy_ip}
|
|
134
|
+
# logger.info("新代理IP: {}, 已使用IP数量: {}/{}", new_proxy_ip, used_ip_count + 1, MAX_IP_LIMIT)
|
|
135
|
+
used_ip_count += 1 # 增加IP计数器
|
|
136
|
+
|
|
137
|
+
# 创建线程池处理当前失败的页码
|
|
138
|
+
with ThreadPoolExecutor(max_workers=10) as executor:
|
|
139
|
+
futures = {
|
|
140
|
+
executor.submit(get_stock_page_data_time_out, pn, proxies, PAGE_SIZE, time_out): pn
|
|
141
|
+
for pn in current_failed_pages
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# 收集结果并记录成功页码
|
|
145
|
+
for future, pn in futures.items():
|
|
146
|
+
try:
|
|
147
|
+
result = future.result()
|
|
148
|
+
if not result.empty:
|
|
149
|
+
results.append(result)
|
|
150
|
+
success_pages.add(pn)
|
|
151
|
+
# else:
|
|
152
|
+
# logger.warning("页码 {} 未返回有效数据", pn)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
continue
|
|
155
|
+
# logger.error("页码 {} 处理异常: {}", pn, str(e))
|
|
156
|
+
|
|
157
|
+
# 检查是否达到IP上限
|
|
158
|
+
if used_ip_count >= MAX_IP_LIMIT and (all_pages - success_pages):
|
|
159
|
+
remaining_pages = all_pages - success_pages
|
|
160
|
+
logger.warning("已达到最大IP使用限制({}个),剩余未获取页码: {}, 返回现有数据", MAX_IP_LIMIT, remaining_pages)
|
|
161
|
+
|
|
162
|
+
# 合并所有成功获取的数据
|
|
163
|
+
if results:
|
|
164
|
+
return pd.concat(results, ignore_index=True)
|
|
165
|
+
else:
|
|
166
|
+
return pd.DataFrame()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def get_stock_real_time_quotes(time_out):
|
|
170
|
+
try_numer = 3
|
|
171
|
+
while try_numer > 0:
|
|
172
|
+
proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
173
|
+
initial_proxies = {"https": proxy_ip,
|
|
174
|
+
"http": proxy_ip}
|
|
175
|
+
|
|
176
|
+
total_number = east_money_stock_common_api.get_stocks_num(1, initial_proxies, 20, time_out)
|
|
177
|
+
if total_number > 0:
|
|
178
|
+
break
|
|
179
|
+
try_numer = try_numer - 1
|
|
180
|
+
if total_number == 0:
|
|
181
|
+
return pd.DataFrame()
|
|
182
|
+
|
|
183
|
+
result_df = repeated_acquisition_ask_async(initial_proxies, time_out, total_number)
|
|
184
|
+
return east_money_stock_common_api.rename_real_time_quotes_df(result_df)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
if __name__ == '__main__':
|
|
188
|
+
|
|
189
|
+
while True:
|
|
190
|
+
# proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
|
|
191
|
+
# proxies = {"https": proxy_ip,
|
|
192
|
+
# "http": proxy_ip}
|
|
193
|
+
time_out_test = 10 # Set the timeout value
|
|
194
|
+
result = get_stock_real_time_quotes(time_out_test)
|
|
195
|
+
print(result)
|
|
@@ -16,6 +16,7 @@ def stock_k_line_hist(
|
|
|
16
16
|
end_date: str = "20500101",
|
|
17
17
|
adjust: str = "",
|
|
18
18
|
timeout: float = None,
|
|
19
|
+
proxies: str = None
|
|
19
20
|
) -> pd.DataFrame:
|
|
20
21
|
"""
|
|
21
22
|
东方财富网-行情首页-沪深京 A 股-每日行情
|
|
@@ -32,6 +33,9 @@ def stock_k_line_hist(
|
|
|
32
33
|
:type adjust: str
|
|
33
34
|
:param timeout: choice of None or a positive float number
|
|
34
35
|
:type timeout: float
|
|
36
|
+
:param proxies: 代理ip
|
|
37
|
+
:type proxies: str
|
|
38
|
+
|
|
35
39
|
:return: 每日行情
|
|
36
40
|
:rtype: pandas.DataFrame
|
|
37
41
|
"""
|
|
@@ -49,7 +53,12 @@ def stock_k_line_hist(
|
|
|
49
53
|
"end": end_date,
|
|
50
54
|
"_": "1623766962675",
|
|
51
55
|
}
|
|
52
|
-
|
|
56
|
+
|
|
57
|
+
if proxies is None:
|
|
58
|
+
r = requests.get(url, params=params, timeout=timeout)
|
|
59
|
+
else:
|
|
60
|
+
r = requests.get(url, params=params, proxies=proxies, timeout=timeout)
|
|
61
|
+
|
|
53
62
|
data_json = r.json()
|
|
54
63
|
if not (data_json["data"] and data_json["data"]["klines"]):
|
|
55
64
|
return pd.DataFrame()
|
|
@@ -103,4 +112,5 @@ if __name__ == '__main__':
|
|
|
103
112
|
"19700101",
|
|
104
113
|
"20500101",
|
|
105
114
|
"",
|
|
115
|
+
None,
|
|
106
116
|
None)
|
|
@@ -9,6 +9,7 @@ sys.path.append(project_path)
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
import mns_common.utils.data_frame_util as data_frame_util
|
|
11
11
|
import mns_common.api.kpl.common.kpl_common_field_constant as kpl_common_field_constant
|
|
12
|
+
from loguru import logger
|
|
12
13
|
|
|
13
14
|
BEST_CHOOSE = '7'
|
|
14
15
|
AREA = '6'
|
|
@@ -117,7 +118,41 @@ def get_plate_index_his(index, index_type, str_day, begin, end):
|
|
|
117
118
|
return data_df_his
|
|
118
119
|
|
|
119
120
|
|
|
121
|
+
# 获取开盘啦 股票入选概念原因
|
|
122
|
+
def get_kpl_concept_choose_reason(plate_id, symbol_list_str):
|
|
123
|
+
try:
|
|
124
|
+
response = requests.get(
|
|
125
|
+
url="https://apphwshhq.longhuvip.com/w1/api/index.php",
|
|
126
|
+
params={
|
|
127
|
+
"PhoneOSNew": "2",
|
|
128
|
+
"PlateID": plate_id,
|
|
129
|
+
"Stocks": symbol_list_str,
|
|
130
|
+
"VerSion": "5.21.0.3",
|
|
131
|
+
"a": "PlateIntroduction_Info",
|
|
132
|
+
"apiv": "w42",
|
|
133
|
+
"c": "ZhiShuRanking"
|
|
134
|
+
},
|
|
135
|
+
headers={
|
|
136
|
+
"User-Agent": "lhb/5.21.3 (com.kaipanla.www; build:0; iOS 18.6.2) Alamofire/4.9.1"
|
|
137
|
+
}
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
json_result = response.json()
|
|
141
|
+
symbol_list_reason = json_result.get('List')
|
|
142
|
+
|
|
143
|
+
df_simple = pd.DataFrame(symbol_list_reason, columns=['symbol', 'choose_reason', 'flag', 'remark'])
|
|
144
|
+
return df_simple
|
|
145
|
+
except BaseException as e:
|
|
146
|
+
logger.error("获取开盘啦股票入选原因异常:{},{},{}", plate_id, symbol_list_str, e)
|
|
147
|
+
return pd.DataFrame()
|
|
148
|
+
|
|
149
|
+
|
|
120
150
|
if __name__ == '__main__':
|
|
151
|
+
plate_id_test = '801133'
|
|
152
|
+
symbol_list_str_test = "002017,002104,301678,003040,300689,300608,300698,600941,002313,002881,002467,300579,002355,300205,300638,002049"
|
|
153
|
+
|
|
154
|
+
df_simple_test = get_kpl_concept_choose_reason(plate_id_test, symbol_list_str_test)
|
|
155
|
+
|
|
121
156
|
df = plate_detail_info('801359')
|
|
122
157
|
|
|
123
158
|
df = get_plate_index(7)
|
|
@@ -10,6 +10,13 @@ import requests
|
|
|
10
10
|
import time
|
|
11
11
|
import hashlib
|
|
12
12
|
import json
|
|
13
|
+
from mns_common.db.MongodbUtil import MongodbUtil
|
|
14
|
+
from functools import lru_cache
|
|
15
|
+
import mns_common.constant.db_name_constant as db_name_constant
|
|
16
|
+
|
|
17
|
+
mongodb_util = MongodbUtil('27017')
|
|
18
|
+
|
|
19
|
+
import random
|
|
13
20
|
|
|
14
21
|
# 提取订单
|
|
15
22
|
"""
|
|
@@ -26,8 +33,49 @@ import json
|
|
|
26
33
|
"""
|
|
27
34
|
|
|
28
35
|
|
|
36
|
+
@lru_cache(maxsize=None)
|
|
37
|
+
def query_province_and_city_info():
|
|
38
|
+
return mongodb_util.find_all_data(db_name_constant.IP_PROXY_CITY_PROVINCE)
|
|
39
|
+
|
|
40
|
+
|
|
29
41
|
def get_proxy_api(order_id, secret, unbind_time):
|
|
42
|
+
province_and_city_info_df = query_province_and_city_info()
|
|
43
|
+
random_row = province_and_city_info_df.sample(n=1)
|
|
44
|
+
cid = str(list(random_row['cid'])[0])
|
|
45
|
+
pid = str(list(random_row['pid'])[0])
|
|
46
|
+
|
|
30
47
|
num = "1"
|
|
48
|
+
noDuplicate = "1"
|
|
49
|
+
lineSeparator = "0"
|
|
50
|
+
singleIp = "0"
|
|
51
|
+
time_str = str(int(time.time())) # 时间戳
|
|
52
|
+
|
|
53
|
+
# 计算sign
|
|
54
|
+
txt = "orderId=" + order_id + "&" + "secret=" + secret + "&" + "time=" + time_str
|
|
55
|
+
sign = hashlib.md5(txt.encode()).hexdigest()
|
|
56
|
+
# 访问URL获取IP
|
|
57
|
+
url = (
|
|
58
|
+
"http://api.hailiangip.com:8422/api/getIp?type=1" + "&num=" + num + "&pid=" + pid
|
|
59
|
+
+ "&unbindTime=" + unbind_time + "&cid=" + cid
|
|
60
|
+
+ "&orderId=" + order_id + "&time=" + time_str + "&sign=" + sign + "&dataType=0"
|
|
61
|
+
+ "&lineSeparator=" + lineSeparator + "&noDuplicate=" + noDuplicate + "&singleIp=" + singleIp)
|
|
62
|
+
my_response = requests.get(url).content
|
|
63
|
+
js_res = json.loads(my_response)
|
|
64
|
+
for dic in js_res["data"]:
|
|
65
|
+
try:
|
|
66
|
+
ip = dic["ip"]
|
|
67
|
+
# ip = dic["realIp"]
|
|
68
|
+
port = dic["port"]
|
|
69
|
+
ip_port = ip + ":" + str(port)
|
|
70
|
+
return ip_port
|
|
71
|
+
except BaseException as e:
|
|
72
|
+
logger.error("获取ip地址异常:{}", e)
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# 线程池
|
|
77
|
+
def get_proxy_pool_api(order_id, secret, unbind_time, ip_num):
|
|
78
|
+
num = str(ip_num)
|
|
31
79
|
pid = "-1"
|
|
32
80
|
cid = ""
|
|
33
81
|
noDuplicate = "1"
|
|
@@ -46,20 +94,22 @@ def get_proxy_api(order_id, secret, unbind_time):
|
|
|
46
94
|
+ "&lineSeparator=" + lineSeparator + "&noDuplicate=" + noDuplicate + "&singleIp=" + singleIp)
|
|
47
95
|
my_response = requests.get(url).content
|
|
48
96
|
js_res = json.loads(my_response)
|
|
97
|
+
ip_pool_list = []
|
|
49
98
|
for dic in js_res["data"]:
|
|
50
99
|
try:
|
|
51
100
|
ip = dic["ip"]
|
|
52
101
|
port = dic["port"]
|
|
53
102
|
ip_port = ip + ":" + str(port)
|
|
54
|
-
|
|
103
|
+
ip_pool_list.append(ip_port)
|
|
55
104
|
except BaseException as e:
|
|
56
105
|
logger.error("获取ip地址异常:{}", e)
|
|
57
106
|
return None
|
|
107
|
+
return ip_pool_list
|
|
58
108
|
|
|
59
109
|
|
|
60
110
|
if __name__ == '__main__':
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
ip = get_proxy_api(
|
|
111
|
+
order_id_test = ''
|
|
112
|
+
secret_test = ''
|
|
113
|
+
unbind_time_test = str(60 * 10)
|
|
114
|
+
ip = get_proxy_api(order_id_test, secret_test, unbind_time_test)
|
|
65
115
|
print(ip)
|