reyfetch 1.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reyfetch/rgeneral.py ADDED
@@ -0,0 +1,158 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2024-01-10 21:57:08
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : General methods.
9
+ """
10
+
11
+
12
+ from json import loads as json_loads
13
+ from reykit.rnet import request
14
+ from reykit.rre import search
15
+ from reykit.rtime import now
16
+
17
+
18
+ __all__ = (
19
+ 'crawl_calendar',
20
+ 'crawl_lunar_calendar'
21
+ )
22
+
23
+
24
+ def crawl_calendar(
25
+ year: int | None = None,
26
+ month: int | None = None
27
+ ) -> list[dict]:
28
+ """
29
+ Crawl Baidu Web calendar table.
30
+
31
+ Parameters
32
+ ----------
33
+ year : Given year.
34
+ - `None`: Now year.
35
+ month : Given month.
36
+ - `None`: Now month.
37
+
38
+ Returns
39
+ -------
40
+ Calendar table.
41
+ """
42
+
43
+ # Get parameter.
44
+ now_date = now('date')
45
+ year = year or now_date.year
46
+ month = month or now_date.month
47
+ if month == 12:
48
+ month = 1
49
+ else:
50
+ month += 1
51
+ url = 'https://opendata.baidu.com/data/inner'
52
+ query = '%s年%s月' % (year, month)
53
+ params = {
54
+ 'tn': 'reserved_all_res_tn',
55
+ 'type': 'json',
56
+ 'resource_id': '52109',
57
+ 'query': query,
58
+ 'apiType': 'yearMonthData',
59
+ 'cb': 'jsonp_1706670926975_94318'
60
+ }
61
+
62
+ # Request.
63
+ response = request(url, params)
64
+
65
+ # Extract.
66
+ pattern = '{.+}'
67
+ text = search(pattern, response.text)
68
+ data: dict = json_loads(text)
69
+ table: list[dict] = data['Result'][0]['DisplayData']['resultData']['tplData']['data']['almanac']
70
+
71
+ # Convert.
72
+ week_dict = {
73
+ '一': 0,
74
+ '二': 1,
75
+ '三': 2,
76
+ '四': 3,
77
+ '五': 4,
78
+ '六': 5,
79
+ '日': 6
80
+ }
81
+ table = [
82
+ {
83
+ 'year': int(row['year']),
84
+ 'month': int(row['month']),
85
+ 'day': int(row['day']),
86
+ 'week': week_dict[row['cnDay']],
87
+ 'work': row.get('status'),
88
+ 'festival': [
89
+ {
90
+ 'name': info['name'],
91
+ 'url': info.get('baikeUrl')
92
+ }
93
+ for info in row.get('festivalInfoList', [])
94
+ ],
95
+ 'animal': row['animal'],
96
+ 'lunar_year': int(row['lunarYear']),
97
+ 'lunar_month': int(row['lunarMonth']),
98
+ 'lunar_day': int(row['lunarDate']),
99
+ 'gz_year': row['gzYear'],
100
+ 'gz_month': row['gzMonth'],
101
+ 'gz_day': row['gzDate'],
102
+ 'suit': row['suit'].split('.'),
103
+ 'avoid': row['avoid'].split('.'),
104
+ 'url': row['yjJumpUrl']
105
+ }
106
+ for row in table
107
+ ]
108
+ for row in table:
109
+ week = row['week']
110
+ work = row['work']
111
+ match work:
112
+ case None:
113
+ is_work_day = week not in (5, 6)
114
+ case '1':
115
+ is_work_day = False
116
+ case '2':
117
+ is_work_day = True
118
+ row['work'] = is_work_day
119
+
120
+ return table
121
+
122
+
123
+ def crawl_lunar_calendar(
124
+ year: int | None = None,
125
+ month: int | None = None
126
+ ) -> list[dict]:
127
+ """
128
+ Crawl Rili Web lunar calendar table.
129
+
130
+ Parameters
131
+ ----------
132
+ year : Given year.
133
+ - `None`: Now year.
134
+ month : Given month.
135
+ - `None`: Now month.
136
+
137
+ Returns
138
+ -------
139
+ Lunar calendar table.
140
+ """
141
+
142
+ # Get parameter.
143
+ now_date = now('date')
144
+ year = year or now_date.year
145
+ month = month or now_date.month
146
+ url = 'https://www.rili.com.cn/rili/json/pc_wnl/%s/%02d.js' % (year, month)
147
+ params = {'_': now('timestamp')}
148
+
149
+ # Request.
150
+ response = request(url, params)
151
+
152
+ # Extract.
153
+ pattern = '{.+}'
154
+ text = search(pattern, response.text)
155
+ data = json_loads(text)
156
+ table = data['data']
157
+
158
+ return table
reyfetch/rsina.py ADDED
@@ -0,0 +1,239 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2024-01-22 14:06:05
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : Sina Web fetch methods.
9
+ """
10
+
11
+
12
+ from typing import TypedDict, Literal
13
+ from reykit.rbase import throw
14
+ from reykit.rnet import request
15
+ from reykit.rre import search, findall, sub
16
+ from reykit.rtime import now
17
+
18
+
19
+ __all__ = (
20
+ 'crawl_sina_search_market',
21
+ 'crawl_sina_stock_info'
22
+ )
23
+
24
+
25
+ SinaStockInfo = TypedDict(
26
+ 'SinaStockInfo',
27
+ {
28
+ 'code': str,
29
+ 'name': str,
30
+ 'price': float,
31
+ 'open': float,
32
+ 'pre_close': float,
33
+ 'high': float,
34
+ 'low': float,
35
+ 'volume': int,
36
+ 'amount': int,
37
+ 'time': str,
38
+ 'url': str,
39
+ 'change': float,
40
+ 'change_rate': float,
41
+ 'swing': float
42
+ }
43
+ )
44
+
45
+
46
+ def crawl_sina_search_market(keyword: str) -> list[dict[Literal['code', 'name', 'type', 'url'], str]]:
47
+ """
48
+ Crawl Sina Web and search market product table.
49
+
50
+ Parameters
51
+ ----------
52
+ keyword : Search keyword.
53
+
54
+ Returns
55
+ -------
56
+ Search result table.
57
+ """
58
+
59
+ # Parameter.
60
+ url = 'https://biz.finance.sina.com.cn/suggest/lookup_n.php'
61
+ params = {
62
+ 'country': '',
63
+ 'q': keyword
64
+ }
65
+
66
+ # Request.
67
+ response = request(
68
+ url,
69
+ params,
70
+ check=True
71
+ )
72
+
73
+ # Unique result.
74
+ if response.request.url.startswith("https://finance.sina.com.cn"):
75
+ pattern = "var papercode = '(.+?)'"
76
+ stock_code = search(pattern, response.text)
77
+ pattern = "var stockname = '(.+?)'"
78
+ stock_name = search(pattern, response.text)
79
+ row = {
80
+ 'code': stock_code,
81
+ 'name': stock_name,
82
+ 'type': '沪深股市(个股)',
83
+ 'url': response.request.url
84
+ }
85
+ table = [row]
86
+ return table
87
+
88
+ # Extract.
89
+ pattern = '<div class="(market|list)"(.+?)</div>'
90
+ labels_result: tuple[str, str] = findall(pattern, response.text)
91
+ table = []
92
+ for index, (label_class, div_text) in enumerate(labels_result):
93
+ if label_class != 'list':
94
+ continue
95
+ stock_type_div_text = labels_result[index - 1][1]
96
+ stock_type = stock_type_div_text.rsplit('<div>', 1)[1]
97
+ pattern = '<label><a href="([^"]+)" target="_blank">(.+?)</label>'
98
+ stocks_result = findall(pattern, div_text)
99
+ for stock_url, stock_text in stocks_result:
100
+ pattern = '<.+?>'
101
+ stock_info = sub(pattern, stock_text)
102
+ stock_info_split = stock_info.split(maxsplit=1)
103
+ if len(stock_info_split) != 2:
104
+ continue
105
+ stock_code, stock_name = stock_info_split
106
+ if stock_name.startswith('('):
107
+ stock_name = stock_name[1:-1]
108
+ row = {
109
+ 'code': stock_code,
110
+ 'name': stock_name,
111
+ 'type': stock_type,
112
+ 'url': stock_url
113
+ }
114
+ table.append(row)
115
+
116
+ return table
117
+
118
+
119
+ def crawl_sina_stock_info(code: str | list[str]) -> list[SinaStockInfo]:
120
+ """
121
+ Crawl Sina Web stock information.
122
+
123
+ Parameters
124
+ ----------
125
+ code : Stock code.
126
+
127
+ Returns
128
+ -------
129
+ Stock information table.
130
+ """
131
+
132
+ # Parameter.
133
+ if type(code) == str:
134
+ code = code.split(',')
135
+ code = [
136
+ (
137
+ i
138
+ if i[-1] in '0123456789'
139
+ else 'gb_' + i.replace('.', '$')
140
+ )
141
+ for i in code
142
+ ]
143
+ code = ','.join(code)
144
+ code = code.lower()
145
+ url = 'https://hq.sinajs.cn/rn=%s&list=%s' % (
146
+ now('timestamp'),
147
+ code
148
+ )
149
+ headers = {'Referer': 'https://finance.sina.com.cn'}
150
+
151
+ # Request.
152
+ response = request(
153
+ url,
154
+ headers=headers,
155
+ check=True
156
+ )
157
+
158
+ # Extract.
159
+ pattern = '([^_]+?)="([^"]*)"'
160
+ result: list[tuple[str, str]] = findall(pattern, response.text)
161
+ table = []
162
+ for code, info in result:
163
+ info_list = info.split(',')
164
+ info_list_len = len(info_list)
165
+ match info_list_len:
166
+
167
+ ## A.
168
+ case 34:
169
+ (
170
+ stock_name,
171
+ stock_open,
172
+ stock_pre_close,
173
+ stock_price,
174
+ stock_high,
175
+ stock_low,
176
+ _,
177
+ _,
178
+ stock_volume,
179
+ stock_amount,
180
+ *_,
181
+ stock_date,
182
+ stock_time,
183
+ _,
184
+ _
185
+ ) = info_list
186
+ row = {
187
+ 'code': code,
188
+ 'name': stock_name,
189
+ 'price': float(stock_price),
190
+ 'open': float(stock_open),
191
+ 'pre_close': float(stock_pre_close),
192
+ 'high': float(stock_high),
193
+ 'low': float(stock_low),
194
+ 'volume': int(float(stock_volume)),
195
+ 'amount': int(float(stock_amount)),
196
+ 'time': '%s %s' % (stock_date, stock_time),
197
+ 'url': 'https://finance.sina.com.cn/realstock/company/%s/nc.shtml' % code
198
+ }
199
+
200
+ # US.
201
+ case 36 | 30:
202
+ (
203
+ stock_name,
204
+ stock_price,
205
+ _,
206
+ stock_date_time,
207
+ _,
208
+ stock_open,
209
+ stock_high,
210
+ stock_low,
211
+ _, _,
212
+ stock_amount,
213
+ _, _, _, _, _, _, _, _, _, _, _, _, _, _, _,
214
+ stock_pre_close,
215
+ *_
216
+ ) = info_list
217
+ row = {
218
+ 'code': code,
219
+ 'name': stock_name,
220
+ 'price': float(stock_price),
221
+ 'open': float(stock_open),
222
+ 'pre_close': float(stock_pre_close),
223
+ 'high': float(stock_high),
224
+ 'low': float(stock_low),
225
+ 'amount': int(float(stock_amount)),
226
+ 'time': stock_date_time,
227
+ 'url': 'https://stock.finance.sina.com.cn/usstock/quotes/%s.html' % code.replace('$', '.')
228
+ }
229
+
230
+ ## Throw exception.
231
+ case _:
232
+ throw(AssertionError, info)
233
+
234
+ row['change'] = round(row['price'] - row['pre_close'], 4)
235
+ row['change_rate'] = round(row['change'] / row['pre_close'] * 100, 4)
236
+ row['swing'] = round((row['high'] - row['low']) / row['high'] * 100, 4)
237
+ table.append(row)
238
+
239
+ return table
reyfetch/rtoutiao.py ADDED
@@ -0,0 +1,71 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2024-01-22 14:06:05
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : Toutiao Web fetch methods.
9
+ """
10
+
11
+
12
+ from typing import Any, Literal
13
+ from reykit.rnet import request
14
+
15
+
16
+ __all__ = (
17
+ 'crawl_toutiao_hot_search',
18
+ )
19
+
20
+
21
+ def crawl_toutiao_hot_search() -> list[dict[Literal['title', 'type', 'label', 'hot', 'url', 'image'], Any]]:
22
+ """
23
+ Crawl Toutiao Web hot search table.
24
+
25
+ Returns
26
+ -------
27
+ Hot search table.
28
+ - `Key 'title'`: Hot search title.
29
+ - `Key 'type'`: Hot search type list.
30
+ - `Key 'label'`: Hot search label.
31
+ - `Key 'hot'`: Hot search hot value.
32
+ - `Key 'url'`: Hot search URL.
33
+ - `Key 'image'`: Hot search image URL.
34
+ """
35
+
36
+ # Request.
37
+ url = 'https://www.toutiao.com/hot-event/hot-board/'
38
+ params = {'origin': 'toutiao_pc'}
39
+ response = request(
40
+ url,
41
+ params,
42
+ check=True
43
+ )
44
+
45
+ # Extract.
46
+ response_json = response.json()
47
+ table: list[dict] = response_json['data']
48
+
49
+ # Convert.
50
+ table = [
51
+ {
52
+ 'title': info['Title'],
53
+ 'type': info.get('InterestCategory'),
54
+ 'label': info.get('LabelDesc'),
55
+ 'hot': int(info['HotValue']),
56
+ 'url': info['Url'],
57
+ 'image': info['Image']['url'],
58
+ }
59
+ for info in table
60
+ ]
61
+ sort_key = lambda row: row['hot']
62
+ table.sort(key=sort_key, reverse=True)
63
+ table = [
64
+ {
65
+ 'rank': index,
66
+ **row
67
+ }
68
+ for index, row in enumerate(table)
69
+ ]
70
+
71
+ return table
reyfetch/rweibo.py ADDED
@@ -0,0 +1,90 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2024-01-22 14:06:05
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : Weibo Web fetch methods.
9
+ """
10
+
11
+
12
+ from typing import Any, Literal
13
+ from fake_useragent import UserAgent
14
+ from reykit.rnet import request, join_url
15
+ from reykit.rtime import now
16
+
17
+
18
+ __all__ = (
19
+ 'crawl_weibo_hot_search',
20
+ )
21
+
22
+
23
+ def crawl_weibo_hot_search() -> list[dict[Literal['rank', 'time', 'title', 'type', 'hot', 'url'], Any]]:
24
+ """
25
+ Crawl Weibo Web hot search table.
26
+
27
+ Returns
28
+ -------
29
+ Hot search table.
30
+ - `Key 'rank'`: Hot search rank.
31
+ - `Key 'time'`: Hot search time.
32
+ - `Key 'title'`: Hot search title.
33
+ - `Key 'type'`: Hot search type.
34
+ - `Key 'hot'`: Hot search hot value.
35
+ - `Key 'url'`: Hot search URL.
36
+ """
37
+
38
+ # Request.
39
+ url = 'https://weibo.com/ajax/side/searchBand'
40
+ timestamp_second = now('timestamp_s')
41
+ params = {
42
+ 'type': 'hot',
43
+ 'last_tab': 'hot',
44
+ 'last_table_time': timestamp_second
45
+ }
46
+ ua = UserAgent()
47
+ headers = {
48
+ 'cookie': (
49
+ 'SUB=_2AkMf61vxf8NxqwFRmvgTzm_la4RxzAvEieKpt6oqJRMxHRl-yT9yqmIEtRB6NGt1HrGel2jwtm1TPoj0LB2qbH5Djjty; '
50
+ 'SUBP=0033WrSXqPxfM72-Ws9jqgMF55529P9D9W5LjsD9P67XdiTS.eBzcX8n; '
51
+ 'XSRF-TOKEN=JUL_aQ7hlSuI98dYZDxZdYNV; '
52
+ 'WBPSESS=9-DrhgMbGnVf8No6y5BLAa-AdtUBbe2eTM9RR6Vd3EQO6R5LLxnh_NKkxuJ_a9m2rFeEEGrQEIgK1oe4gs2SnXWX_ZT5_XC9csUnNHL-q-ZJLzj9wbKvtMB4ZYVnfrM8'
53
+ ),
54
+ 'referer': 'https://weibo.com/newlogin?tabtype=weibo&gid=102803&openLoginLayer=0&url=https%3A%2F%2Fwww.weibo.com%2F',
55
+ 'user-agent': ua.edge,
56
+ }
57
+ response = request(url, params, headers=headers, check=True)
58
+
59
+ # Extract.
60
+ response_json = response.json()
61
+ table: list[dict] = response_json['data']['realtime']
62
+
63
+ # Convert.
64
+ table = [
65
+ {
66
+ 'title': info['word'],
67
+ 'hot': info['num'],
68
+ 'url': join_url(
69
+ 'https://s.weibo.com/weibo',
70
+ {'q': '#%s#' % info['word']}
71
+ )
72
+ }
73
+ for info in table
74
+ if 'flag' in info
75
+ ]
76
+ sort_key = lambda row: (
77
+ 0
78
+ if row['hot'] is None
79
+ else row['hot']
80
+ )
81
+ table.sort(key=sort_key, reverse=True)
82
+ table = [
83
+ {
84
+ 'rank': index,
85
+ **row
86
+ }
87
+ for index, row in enumerate(table)
88
+ ]
89
+
90
+ return table
@@ -0,0 +1,30 @@
1
+ Metadata-Version: 2.4
2
+ Name: reyfetch
3
+ Version: 1.0.35
4
+ Summary: Web data fetch method set.
5
+ Project-URL: homepage, https://github.com/reyxbo/reyfetch/
6
+ Author-email: Rey <reyxbo@163.com>
7
+ License: Copyright 2025 ReyXBo
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
14
+ License-File: LICENSE
15
+ Keywords: crawl Web,fetch,request API,rey,reyxbo
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: fake-useragent
18
+ Requires-Dist: reykit
19
+ Requires-Dist: selenium
20
+ Description-Content-Type: text/markdown
21
+
22
+ # reyfetch
23
+
24
+ > Web data fetch method set.
25
+
26
+ ## Install
27
+
28
+ ```
29
+ pip install reyfetch
30
+ ```
@@ -0,0 +1,14 @@
1
+ reyfetch/__init__.py,sha256=gDkQmFLZGToSqb1HVfvGxvc9t3mt8slg7_MVc44llns,484
2
+ reyfetch/rali.py,sha256=fdwraDilYcSnsCdV3FaLAT_g-legTufm0H73x8OQkx0,33667
3
+ reyfetch/rall.py,sha256=86TMZiNsv-UinZ2L3_m3ugTHekUWnfVlWMedte0MOwQ,350
4
+ reyfetch/rbaidu.py,sha256=vwVHiWxeaKnklGcORH2b4njmnmtAdWclSqDdJFNRirY,14023
5
+ reyfetch/rbase.py,sha256=Q9FldtjDHy3GSgGBv1qzheFKVBnQB9UvwFswbv6dQDA,5021
6
+ reyfetch/rdouban.py,sha256=X2wGolwPiYXHBJj06_u2dfNSKDtB68qJQvmij1ysUn8,19372
7
+ reyfetch/rgeneral.py,sha256=s270rtlVv1owkGF57Tjs8Oc5bJRD_XtWISpKlEJ7mbE,3797
8
+ reyfetch/rsina.py,sha256=WONT6NN67seYjlvXZ39RbibticdDAuauGk6SAvtdEq8,6889
9
+ reyfetch/rtoutiao.py,sha256=zLPnf2_XSC0d2RZgtMVoVyWpA2UDGNXYpRjYi3DGabI,1671
10
+ reyfetch/rweibo.py,sha256=T58q6wOv0p7j6WBH4we5k9x8IgFhQN__wqsqTHYMbmU,2522
11
+ reyfetch-1.0.35.dist-info/METADATA,sha256=JF2TLmNHfJ1mv5LHy8wIn6CvyV91RmP2bGA0SRFI9Mw,1589
12
+ reyfetch-1.0.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ reyfetch-1.0.35.dist-info/licenses/LICENSE,sha256=UYLPqp7BvPiH8yEZduJqmmyEl6hlM3lKrFIefiD4rvk,1059
14
+ reyfetch-1.0.35.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,7 @@
1
+ Copyright 2025 ReyXBo
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.