mns-common 1.5.1.7__py3-none-any.whl → 1.5.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mns-common might be problematic. Click here for more details.

@@ -1,363 +0,0 @@
1
- import requests
2
-
3
- import mns_common.utils.data_frame_util as data_frame_util
4
- import json
5
- import datetime
6
-
7
- import threading
8
- from concurrent.futures import ThreadPoolExecutor
9
- import mns_common.component.proxies.proxy_common_api as proxy_common_api
10
- from loguru import logger
11
- import concurrent.futures
12
- import pandas as pd
13
- import time
14
- from concurrent.futures import ThreadPoolExecutor, as_completed
15
- from threading import Lock
16
-
17
- fields = ("f352,f2,f3,f5,f6,f8,f10,f11,f22,f12,f14,f15,f16,f17,"
18
- "f18,f20,f21,f26,f33,f34,f35,f62,f66,f69,f72,f100,f184,f211,f212"),
19
- fs = "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048"
20
-
21
- # 最大返回条数
22
- max_number = 5800
23
- # 最小返回条数
24
- min_number = 5600
25
- # 分页条数
26
- PAGE_SIZE = 100
27
-
28
-
29
- def get_stock_page_data_time_out(pn, proxies, page_size, time_out):
30
- """
31
- 获取单页股票数据
32
- """
33
- # 获取当前日期和时间
34
- current_time = datetime.datetime.now()
35
-
36
- # 将当前时间转换为时间戳(以毫秒为单位)
37
- current_timestamp_ms = int(current_time.timestamp() * 1000)
38
-
39
- url = "https://33.push2.eastmoney.com/api/qt/clist/get"
40
- params = {
41
- "cb": "jQuery1124046660442520420653_" + str(current_timestamp_ms),
42
- "pn": str(pn),
43
- "pz": str(page_size), # 每页最大200条
44
- "po": "0",
45
- "np": "3",
46
- "ut": "bd1d9ddb04089700cf9c27f6f7426281",
47
- "fltt": "2",
48
- "invt": "2",
49
- "wbp2u": "|0|0|0|web",
50
- "fid": "f12",
51
- "fs": fs,
52
- "fields": fields,
53
- "_": current_timestamp_ms
54
- }
55
- try:
56
- if proxies is None:
57
- r = requests.get(url, params, timeout=time_out)
58
- else:
59
- r = requests.get(url, params, proxies=proxies, timeout=time_out)
60
-
61
- data_text = r.text
62
- if pn == 1:
63
- try:
64
- begin_index_total = data_text.index('"total":')
65
-
66
- end_index_total = data_text.index('"diff"')
67
- global max_number
68
- max_number = int(data_text[begin_index_total + 8:end_index_total - 1])
69
- except Exception as e:
70
- logger.error("获取第{}页股票列表异常:{}", pn, str(e))
71
- return pd.DataFrame()
72
-
73
- begin_index = data_text.index('[')
74
- end_index = data_text.index(']')
75
- data_json = data_text[begin_index:end_index + 1]
76
- data_json = json.loads(data_json)
77
- if data_json is None:
78
- return pd.DataFrame()
79
- else:
80
- result_df = pd.DataFrame(data_json)
81
- result_df['page_number'] = pn
82
- return result_df
83
- except Exception as e:
84
- logger.error("获取第{}页股票列表异常:{}", pn, str(e))
85
- return pd.DataFrame()
86
-
87
-
88
- def repeated_acquisition_ask(per_page, max_number, time_out, max_workers=5):
89
- total_pages = (max_number + per_page - 1) // per_page # 向上取整
90
- result_df = pd.DataFrame()
91
- df_lock = Lock() # 线程安全的DataFrame合并锁
92
-
93
- def fetch_pages(page_nums):
94
- """单个线程处理一组页面,复用代理IP直到失效"""
95
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
96
- proxies = {"https": proxy_ip, "http": proxy_ip}
97
- thread_results = [] # 线程内临时存储结果
98
-
99
- for page_num in page_nums:
100
- while True: # 重试循环(复用当前IP)
101
- try:
102
- page_df = get_stock_page_data_time_out(
103
- page_num, proxies, per_page, time_out
104
- )
105
- if data_frame_util.is_not_empty(page_df):
106
- logger.info("线程{} 页面{}获取成功(IP复用中)",
107
- threading.get_ident(), page_num)
108
- thread_results.append(page_df)
109
- break # 成功后继续用当前IP处理下一页
110
- else:
111
- logger.warning("页面数据为空:{},重试中...", page_num)
112
- # 数据为空,更换IP
113
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
114
- proxies = {"https": proxy_ip, "http": proxy_ip}
115
- time.sleep(0.2)
116
- except BaseException as e:
117
- logger.error("线程{} 页面{}获取异常[{}],更换IP重试",
118
- threading.get_ident(), page_num, str(e))
119
- # 发生异常,更换IP
120
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
121
- proxies = {"https": proxy_ip, "http": proxy_ip}
122
- time.sleep(1)
123
- return thread_results
124
-
125
- # 页面分配:平均分配给每个线程
126
- def split_pages(total, workers):
127
- pages = list(range(1, total + 1))
128
- avg = total // workers
129
- remainder = total % workers
130
- split = []
131
- start = 0
132
- for i in range(workers):
133
- end = start + avg + (1 if i < remainder else 0)
134
- split.append(pages[start:end])
135
- start = end
136
- return split
137
-
138
- # 分配页面组
139
- page_groups = split_pages(total_pages, max_workers)
140
-
141
- # 多线程执行
142
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
143
- futures = [executor.submit(fetch_pages, group) for group in page_groups]
144
-
145
- # 合并结果
146
- for future in as_completed(futures):
147
- try:
148
- thread_dfs = future.result()
149
- if thread_dfs:
150
- with df_lock:
151
- result_df = pd.concat([result_df] + thread_dfs, ignore_index=True)
152
- except Exception as e:
153
- logger.error("线程结果处理失败:{}", str(e))
154
-
155
- return result_df
156
-
157
-
158
- def repeated_acquisition_ask_sync(time_out):
159
- per_page = PAGE_SIZE
160
- total_pages = (max_number + per_page - 1) // per_page # 向上取整
161
- result_df = pd.DataFrame()
162
- now_page = 1
163
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
164
- while now_page <= total_pages:
165
- proxies = {"https": proxy_ip,
166
- "http": proxy_ip}
167
- try:
168
- page_df = get_stock_page_data_time_out(now_page, proxies, PAGE_SIZE, time_out)
169
- if data_frame_util.is_not_empty(page_df):
170
- result_df = pd.concat([page_df, result_df])
171
- logger.info("获取页面数据成功:{}", now_page)
172
- now_page = now_page + 1
173
- else:
174
- time.sleep(0.2)
175
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
176
- logger.info("获取页面数据失败:{}", now_page)
177
- except BaseException as e:
178
- time.sleep(1)
179
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
180
- # 示例调用
181
- return result_df
182
-
183
-
184
- def repeated_acquisition_ask_async(time_out, max_number, num_threads, pages_per_thread):
185
- per_page = PAGE_SIZE
186
- total_pages = (max_number + per_page - 1) // per_page # 向上取整
187
- result_df = pd.DataFrame()
188
-
189
- # 创建线程锁以确保线程安全
190
- df_lock = Lock()
191
-
192
- # 计算每个线程处理的页数范围
193
- def process_page_range(start_page, end_page, thread_id):
194
- nonlocal result_df
195
- local_df = pd.DataFrame()
196
- current_page = start_page
197
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
198
-
199
- while current_page <= end_page and current_page <= total_pages:
200
- proxies = {"https": proxy_ip, "http": proxy_ip}
201
- try:
202
- page_df = get_stock_page_data_time_out(current_page, proxies, PAGE_SIZE, time_out)
203
- if data_frame_util.is_not_empty(page_df):
204
- local_df = pd.concat([local_df, page_df])
205
- logger.info("线程{}获取页面数据成功: {}", thread_id, current_page)
206
- current_page += 1
207
- else:
208
- time.sleep(0.2)
209
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
210
- logger.info("线程{}获取页面数据失败: {}", thread_id, current_page)
211
- except BaseException as e:
212
- time.sleep(1)
213
- proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
214
- logger.error("线程{}处理页面{}时发生错误: {}", thread_id, current_page, e)
215
-
216
- with df_lock:
217
- result_df = pd.concat([result_df, local_df])
218
- return len(local_df)
219
-
220
- # 计算每个线程的页面范围
221
- page_ranges = []
222
- for i in range(num_threads):
223
- start_page = i * pages_per_thread + 1
224
- end_page = (i + 1) * pages_per_thread
225
- if start_page > total_pages:
226
- break
227
- page_ranges.append((start_page, end_page, i + 1))
228
-
229
- # 使用线程池执行任务
230
- with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
231
- # 提交所有任务
232
- futures = [
233
- executor.submit(process_page_range, start, end, tid)
234
- for start, end, tid in page_ranges
235
- ]
236
-
237
- # 等待所有任务完成并获取结果
238
- results = []
239
- for future in concurrent.futures.as_completed(futures):
240
- try:
241
- result = future.result()
242
- results.append(result)
243
- except Exception as e:
244
- logger.error("线程执行出错: {}", e)
245
-
246
- return rename_real_time_quotes_df(result_df)
247
-
248
-
249
- def rename_real_time_quotes_df(temp_df):
250
- temp_df = temp_df.rename(columns={
251
- "f2": "now_price",
252
- "f3": "chg",
253
- "f5": "volume",
254
- "f6": "amount",
255
- "f8": "exchange",
256
- "f10": "quantity_ratio",
257
- "f22": "up_speed",
258
- "f11": "up_speed_05",
259
- "f12": "symbol",
260
- "f14": "name",
261
- "f15": "high",
262
- "f16": "low",
263
- "f17": "open",
264
- "f18": "yesterday_price",
265
- "f20": "total_mv",
266
- "f21": "flow_mv",
267
- "f26": "list_date",
268
- "f33": "wei_bi",
269
- "f34": "outer_disk",
270
- "f35": "inner_disk",
271
- "f62": "today_main_net_inflow",
272
- "f66": "super_large_order_net_inflow",
273
- "f69": "super_large_order_net_inflow_ratio",
274
- "f72": "large_order_net_inflow",
275
- # "f78": "medium_order_net_inflow",
276
- # "f84": "small_order_net_inflow",
277
- "f100": "industry",
278
- # "f103": "concept",
279
- "f184": "today_main_net_inflow_ratio",
280
- "f352": "average_price",
281
- "f211": "buy_1_num",
282
- "f212": "sell_1_num"
283
- })
284
- if data_frame_util.is_empty(temp_df):
285
- return pd.DataFrame()
286
- else:
287
- temp_df.loc[temp_df['buy_1_num'] == '-', 'buy_1_num'] = 0
288
- temp_df.loc[temp_df['sell_1_num'] == '-', 'sell_1_num'] = 0
289
- temp_df.loc[temp_df['up_speed_05'] == '-', 'up_speed_05'] = 0
290
- temp_df.loc[temp_df['up_speed'] == '-', 'up_speed'] = 0
291
- temp_df.loc[temp_df['average_price'] == '-', 'average_price'] = 0
292
- temp_df.loc[temp_df['wei_bi'] == '-', 'wei_bi'] = 0
293
- temp_df.loc[temp_df['yesterday_price'] == '-', 'yesterday_price'] = 0
294
- temp_df.loc[temp_df['now_price'] == '-', 'now_price'] = 0
295
- temp_df.loc[temp_df['chg'] == '-', 'chg'] = 0
296
- temp_df.loc[temp_df['volume'] == '-', 'volume'] = 0
297
- temp_df.loc[temp_df['amount'] == '-', 'amount'] = 0
298
- temp_df.loc[temp_df['exchange'] == '-', 'exchange'] = 0
299
- temp_df.loc[temp_df['quantity_ratio'] == '-', 'quantity_ratio'] = 0
300
- temp_df.loc[temp_df['high'] == '-', 'high'] = 0
301
- temp_df.loc[temp_df['low'] == '-', 'low'] = 0
302
- temp_df.loc[temp_df['open'] == '-', 'open'] = 0
303
- temp_df.loc[temp_df['total_mv'] == '-', 'total_mv'] = 0
304
- temp_df.loc[temp_df['flow_mv'] == '-', 'flow_mv'] = 0
305
- temp_df.loc[temp_df['inner_disk'] == '-', 'inner_disk'] = 0
306
- temp_df.loc[temp_df['outer_disk'] == '-', 'outer_disk'] = 0
307
- temp_df.loc[temp_df['today_main_net_inflow_ratio'] == '-', 'today_main_net_inflow_ratio'] = 0
308
- temp_df.loc[temp_df['today_main_net_inflow'] == '-', 'today_main_net_inflow'] = 0
309
- temp_df.loc[temp_df['super_large_order_net_inflow'] == '-', 'super_large_order_net_inflow'] = 0
310
- temp_df.loc[temp_df['super_large_order_net_inflow_ratio'] == '-', 'super_large_order_net_inflow_ratio'] = 0
311
- temp_df.loc[temp_df['large_order_net_inflow'] == '-', 'large_order_net_inflow'] = 0
312
- # temp_df.loc[temp_df['medium_order_net_inflow'] == '-', 'medium_order_net_inflow'] = 0
313
- # temp_df.loc[temp_df['small_order_net_inflow'] == '-', 'small_order_net_inflow'] = 0
314
-
315
- temp_df["list_date"] = pd.to_numeric(temp_df["list_date"], errors="coerce")
316
- temp_df["wei_bi"] = pd.to_numeric(temp_df["wei_bi"], errors="coerce")
317
- temp_df["average_price"] = pd.to_numeric(temp_df["average_price"], errors="coerce")
318
- temp_df["yesterday_price"] = pd.to_numeric(temp_df["yesterday_price"], errors="coerce")
319
- temp_df["now_price"] = pd.to_numeric(temp_df["now_price"], errors="coerce")
320
- temp_df["chg"] = pd.to_numeric(temp_df["chg"], errors="coerce")
321
- temp_df["volume"] = pd.to_numeric(temp_df["volume"], errors="coerce")
322
- temp_df["amount"] = pd.to_numeric(temp_df["amount"], errors="coerce")
323
- temp_df["exchange"] = pd.to_numeric(temp_df["exchange"], errors="coerce")
324
- temp_df["quantity_ratio"] = pd.to_numeric(temp_df["quantity_ratio"], errors="coerce")
325
- temp_df["high"] = pd.to_numeric(temp_df["high"], errors="coerce")
326
- temp_df["low"] = pd.to_numeric(temp_df["low"], errors="coerce")
327
- temp_df["open"] = pd.to_numeric(temp_df["open"], errors="coerce")
328
- temp_df["total_mv"] = pd.to_numeric(temp_df["total_mv"], errors="coerce")
329
- temp_df["flow_mv"] = pd.to_numeric(temp_df["flow_mv"], errors="coerce")
330
- temp_df["outer_disk"] = pd.to_numeric(temp_df["outer_disk"], errors="coerce")
331
- temp_df["inner_disk"] = pd.to_numeric(temp_df["inner_disk"], errors="coerce")
332
- temp_df["today_main_net_inflow"] = pd.to_numeric(temp_df["today_main_net_inflow"], errors="coerce")
333
- temp_df["super_large_order_net_inflow"] = pd.to_numeric(temp_df["super_large_order_net_inflow"],
334
- errors="coerce")
335
- temp_df["super_large_order_net_inflow_ratio"] = pd.to_numeric(temp_df["super_large_order_net_inflow_ratio"],
336
- errors="coerce")
337
- temp_df["large_order_net_inflow"] = pd.to_numeric(temp_df["large_order_net_inflow"],
338
- errors="coerce")
339
- # temp_df["medium_order_net_inflow"] = pd.to_numeric(temp_df["medium_order_net_inflow"],
340
- # errors="coerce")
341
- # temp_df["small_order_net_inflow"] = pd.to_numeric(temp_df["small_order_net_inflow"], errors="coerce")
342
-
343
- # 大单比例
344
- temp_df['large_order_net_inflow_ratio'] = round((temp_df['large_order_net_inflow'] / temp_df['amount']) * 100,
345
- 2)
346
-
347
- # 外盘是内盘倍数
348
- temp_df['disk_ratio'] = round((temp_df['outer_disk'] - temp_df['inner_disk']) / temp_df['inner_disk'], 2)
349
- # 只有外盘没有内盘
350
- temp_df.loc[temp_df["inner_disk"] == 0, ['disk_ratio']] = 1688
351
- temp_df = temp_df.sort_values(by=['chg'], ascending=False)
352
- return temp_df
353
-
354
-
355
- if __name__ == '__main__':
356
-
357
- while True:
358
- # proxy_ip = proxy_common_api.generate_proxy_ip_api(1)
359
- # proxies = {"https": proxy_ip,
360
- # "http": proxy_ip}
361
- time_out = 10 # Set the timeout value
362
- result = repeated_acquisition_ask_async(time_out, 5800, 6, 10)
363
- print(result)