reyfetch 1.0.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reyfetch/__init__.py +21 -0
- reyfetch/rali.py +990 -0
- reyfetch/rall.py +19 -0
- reyfetch/rbaidu.py +467 -0
- reyfetch/rbase.py +243 -0
- reyfetch/rdouban.py +565 -0
- reyfetch/rgeneral.py +158 -0
- reyfetch/rsina.py +239 -0
- reyfetch/rtoutiao.py +71 -0
- reyfetch/rweibo.py +90 -0
- reyfetch-1.0.35.dist-info/METADATA +30 -0
- reyfetch-1.0.35.dist-info/RECORD +14 -0
- reyfetch-1.0.35.dist-info/WHEEL +4 -0
- reyfetch-1.0.35.dist-info/licenses/LICENSE +7 -0
reyfetch/rall.py
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# !/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
@Time : 2022-12-08 13:11:09
|
6
|
+
@Author : Rey
|
7
|
+
@Contact : reyxbo@163.com
|
8
|
+
@Explain : All methods.
|
9
|
+
"""
|
10
|
+
|
11
|
+
|
12
|
+
from .rali import *
|
13
|
+
from .rbaidu import *
|
14
|
+
from .rbase import *
|
15
|
+
from .rdouban import *
|
16
|
+
from .rgeneral import *
|
17
|
+
from .rsina import *
|
18
|
+
from .rtoutiao import *
|
19
|
+
from .rweibo import *
|
reyfetch/rbaidu.py
ADDED
@@ -0,0 +1,467 @@
|
|
1
|
+
# !/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
@Time : 2024-01-11 21:56:56
|
6
|
+
@Author : Rey
|
7
|
+
@Contact : reyxbo@163.com
|
8
|
+
@Explain : Baidu Web fetch methods.
|
9
|
+
"""
|
10
|
+
|
11
|
+
|
12
|
+
from typing import TypedDict
|
13
|
+
from enum import StrEnum
|
14
|
+
from reydb import rorm
|
15
|
+
from reydb.rdb import Database
|
16
|
+
from reykit.rbase import throw
|
17
|
+
from reykit.rnet import request as reykit_request
|
18
|
+
from reykit.ros import get_md5
|
19
|
+
from reykit.rrand import randn
|
20
|
+
from reykit.rtext import is_zh
|
21
|
+
from reykit.rtime import now
|
22
|
+
|
23
|
+
from .rbase import FetchRequest, FetchRequestWithDatabase, FetchRequestDatabaseRecord
|
24
|
+
|
25
|
+
|
26
|
+
__all__ = (
|
27
|
+
'DatabaseTableBaiduTrans',
|
28
|
+
'FetchRequestBaidu',
|
29
|
+
'FetchRequestBaiduTranslateLangEnum',
|
30
|
+
'FetchRequestBaiduTranslateLangAutoEnum',
|
31
|
+
'FetchRequestBaiduTranslate',
|
32
|
+
'crawl_baidu_trans'
|
33
|
+
)
|
34
|
+
|
35
|
+
|
36
|
+
FanyiResponseResult = TypedDict('FanyiResponseResult', {'src': str, 'dst': str})
|
37
|
+
FanyiResponse = TypedDict('FanyiResponse', {'from': str, 'to': str, 'trans_result': list[FanyiResponseResult]})
|
38
|
+
|
39
|
+
|
40
|
+
class DatabaseTableBaiduTrans(rorm.Model, table=True):
|
41
|
+
"""
|
42
|
+
Database `baidu_trans` table model.
|
43
|
+
"""
|
44
|
+
|
45
|
+
__name__ = 'baidu_trans'
|
46
|
+
__comment__ = 'Baidu API translate request record table.'
|
47
|
+
id: int = rorm.Field(rorm.types_mysql.INTEGER(unsigned=True), key_auto=True, comment='ID.')
|
48
|
+
request_time: rorm.Datetime = rorm.Field(not_null=True, comment='Request time.')
|
49
|
+
response_time: rorm.Datetime = rorm.Field(not_null=True, comment='Response time.')
|
50
|
+
input: str = rorm.Field(rorm.types.VARCHAR(6000), not_null=True, comment='Input original text.')
|
51
|
+
output: str = rorm.Field(rorm.types.TEXT, not_null=True, comment='Output translation text.')
|
52
|
+
input_lang: str = rorm.Field(rorm.types.VARCHAR(4), not_null=True, comment='Input original text language.')
|
53
|
+
output_lang: str = rorm.Field(rorm.types.VARCHAR(3), not_null=True, comment='Output translation text language.')
|
54
|
+
|
55
|
+
|
56
|
+
class FetchRequestBaidu(FetchRequest):
|
57
|
+
"""
|
58
|
+
Request Baidu API fetch type.
|
59
|
+
"""
|
60
|
+
|
61
|
+
|
62
|
+
class FetchRequestBaiduTranslateLangEnum(FetchRequestBaidu, StrEnum):
|
63
|
+
"""
|
64
|
+
Request Baidu translate APT language enumeration fetch type.
|
65
|
+
"""
|
66
|
+
|
67
|
+
ZH = 'zh'
|
68
|
+
EN = 'en'
|
69
|
+
YUE = 'yue'
|
70
|
+
KOR = 'kor'
|
71
|
+
TH = 'th'
|
72
|
+
PT = 'pt'
|
73
|
+
EL = 'el'
|
74
|
+
BUL = 'bul'
|
75
|
+
FIN = 'fin'
|
76
|
+
SLO = 'slo'
|
77
|
+
CHT = 'cht'
|
78
|
+
WYW = 'wyw'
|
79
|
+
FRA = 'fra'
|
80
|
+
ARA = 'ara'
|
81
|
+
DE = 'de'
|
82
|
+
NL = 'nl'
|
83
|
+
EST = 'est'
|
84
|
+
CS = 'cs'
|
85
|
+
SWE = 'swe'
|
86
|
+
VIE = 'vie'
|
87
|
+
JP = 'jp'
|
88
|
+
SPA = 'spa'
|
89
|
+
RU = 'ru'
|
90
|
+
IT = 'it'
|
91
|
+
PL = 'pl'
|
92
|
+
DAN = 'dan'
|
93
|
+
ROM = 'rom'
|
94
|
+
HU ='hu'
|
95
|
+
|
96
|
+
|
97
|
+
class FetchRequestBaiduTranslateLangAutoEnum(FetchRequestBaidu, StrEnum):
|
98
|
+
"""
|
99
|
+
Request Baidu translate APT language auto enumeration fetch type.
|
100
|
+
"""
|
101
|
+
|
102
|
+
AUTO = 'auto'
|
103
|
+
|
104
|
+
|
105
|
+
class FetchRequestBaiduTranslate(FetchRequestBaidu, FetchRequestWithDatabase):
|
106
|
+
"""
|
107
|
+
Request Baidu translate API fetch type.
|
108
|
+
Can create database used `self.build_db` method.
|
109
|
+
|
110
|
+
Attributes
|
111
|
+
----------
|
112
|
+
url_api : API request URL.
|
113
|
+
url_doc : API document URL.
|
114
|
+
LangEnum : Baidu Fanyi APT language enumeration type.
|
115
|
+
LangEnum : Baidu Fanyi APT language auto type enumeration.
|
116
|
+
db_names : Database table name mapping dictionary.
|
117
|
+
"""
|
118
|
+
|
119
|
+
url_api = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
|
120
|
+
url_doc = 'https://fanyi-api.baidu.com/product/113'
|
121
|
+
LangEnum = FetchRequestBaiduTranslateLangEnum
|
122
|
+
LangAutoEnum = FetchRequestBaiduTranslateLangAutoEnum
|
123
|
+
db_names = {
|
124
|
+
'baidu_trans': 'baidu_trans',
|
125
|
+
'stats_baidu_trans': 'stats_baidu_trans'
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
def __init__(
|
130
|
+
self,
|
131
|
+
appid: str,
|
132
|
+
appkey: str,
|
133
|
+
db: Database | None = None,
|
134
|
+
max_len: int = 6000
|
135
|
+
) -> None:
|
136
|
+
"""
|
137
|
+
Build instance attributes.
|
138
|
+
|
139
|
+
Parameters
|
140
|
+
----------
|
141
|
+
appid : APP ID.
|
142
|
+
appkey : APP key.
|
143
|
+
db : `Database` instance, insert request record to table.
|
144
|
+
max_len : Maximun length.
|
145
|
+
"""
|
146
|
+
|
147
|
+
# Build.
|
148
|
+
self.appid = appid
|
149
|
+
self.appkey = appkey
|
150
|
+
self.db = db
|
151
|
+
self.max_len = max_len
|
152
|
+
|
153
|
+
# Database.
|
154
|
+
self.db_record = FetchRequestDatabaseRecord(self, 'api', 'baidu_trans')
|
155
|
+
|
156
|
+
|
157
|
+
def sign(self, text: str, num: int) -> str:
|
158
|
+
"""
|
159
|
+
Get signature.
|
160
|
+
|
161
|
+
Parameters
|
162
|
+
----------
|
163
|
+
text : Text.
|
164
|
+
num : Number.
|
165
|
+
|
166
|
+
Returns
|
167
|
+
-------
|
168
|
+
Signature.
|
169
|
+
"""
|
170
|
+
|
171
|
+
# Check.
|
172
|
+
if text == '':
|
173
|
+
throw(ValueError, text)
|
174
|
+
|
175
|
+
# Parameter.
|
176
|
+
num_str = str(num)
|
177
|
+
|
178
|
+
# Sign.
|
179
|
+
data = ''.join(
|
180
|
+
(
|
181
|
+
self.appid,
|
182
|
+
text,
|
183
|
+
num_str,
|
184
|
+
self.appkey
|
185
|
+
)
|
186
|
+
)
|
187
|
+
md5 = get_md5(data)
|
188
|
+
|
189
|
+
return md5
|
190
|
+
|
191
|
+
|
192
|
+
def request(
|
193
|
+
self,
|
194
|
+
text: str,
|
195
|
+
from_lang: FetchRequestBaiduTranslateLangEnum | FetchRequestBaiduTranslateLangAutoEnum,
|
196
|
+
to_lang: FetchRequestBaiduTranslateLangEnum
|
197
|
+
) -> FanyiResponse:
|
198
|
+
"""
|
199
|
+
Request translate API.
|
200
|
+
|
201
|
+
Parameters
|
202
|
+
----------
|
203
|
+
text : Text.
|
204
|
+
from_lang : Source language.
|
205
|
+
to_lang : Target language.
|
206
|
+
|
207
|
+
Returns
|
208
|
+
-------
|
209
|
+
Response dictionary.
|
210
|
+
"""
|
211
|
+
|
212
|
+
# Parameter.
|
213
|
+
rand_num = randn(32768, 65536)
|
214
|
+
sign = self.sign(text, rand_num)
|
215
|
+
params = {
|
216
|
+
'q': text,
|
217
|
+
'from': from_lang.value,
|
218
|
+
'to': to_lang.value,
|
219
|
+
'appid': self.appid,
|
220
|
+
'salt': rand_num,
|
221
|
+
'sign': sign
|
222
|
+
}
|
223
|
+
headers = {'Content-Type': 'application/x-www-form-urlencoded'}
|
224
|
+
|
225
|
+
# Request.
|
226
|
+
response = reykit_request(
|
227
|
+
self.url_api,
|
228
|
+
params,
|
229
|
+
headers=headers,
|
230
|
+
check=True
|
231
|
+
)
|
232
|
+
|
233
|
+
# Check.
|
234
|
+
content_type = response.headers['Content-Type']
|
235
|
+
if content_type.startswith('application/json'):
|
236
|
+
response_json: dict = response.json()
|
237
|
+
if 'error_code' in response_json:
|
238
|
+
throw(AssertionError, response_json)
|
239
|
+
else:
|
240
|
+
throw(AssertionError, content_type)
|
241
|
+
|
242
|
+
return response_json
|
243
|
+
|
244
|
+
|
245
|
+
def get_lang(self, text: str) -> FetchRequestBaiduTranslateLangEnum | None:
|
246
|
+
"""
|
247
|
+
Judge and get text language type.
|
248
|
+
|
249
|
+
Parameters
|
250
|
+
----------
|
251
|
+
text : Text.
|
252
|
+
|
253
|
+
Returns
|
254
|
+
-------
|
255
|
+
Language type or null.
|
256
|
+
"""
|
257
|
+
|
258
|
+
# Hangle parameter.
|
259
|
+
en_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
260
|
+
|
261
|
+
# Judge.
|
262
|
+
for char in text:
|
263
|
+
if char in en_chars:
|
264
|
+
return FetchRequestBaiduTranslateLangEnum.EN
|
265
|
+
elif is_zh(char):
|
266
|
+
return FetchRequestBaiduTranslateLangEnum.ZH
|
267
|
+
|
268
|
+
|
269
|
+
def trans(
|
270
|
+
self,
|
271
|
+
text: str,
|
272
|
+
from_lang: FetchRequestBaiduTranslateLangEnum | FetchRequestBaiduTranslateLangAutoEnum | None = None,
|
273
|
+
to_lang: FetchRequestBaiduTranslateLangEnum | None = None
|
274
|
+
) -> str:
|
275
|
+
"""
|
276
|
+
Translate.
|
277
|
+
|
278
|
+
Parameters
|
279
|
+
----------
|
280
|
+
text : Text.
|
281
|
+
- `self.is_auth is True`: Maximum length is 6000.
|
282
|
+
- `self.is_auth is False`: Maximum length is 3000.
|
283
|
+
from_lang : Source language.
|
284
|
+
- `None`: Automatic judgment.
|
285
|
+
to_lang : Target language.
|
286
|
+
- `None`: Automatic judgment.
|
287
|
+
|
288
|
+
Returns
|
289
|
+
-------
|
290
|
+
Translated text.
|
291
|
+
"""
|
292
|
+
|
293
|
+
# Check.
|
294
|
+
text_len = len(text)
|
295
|
+
if len(text) > self.max_len:
|
296
|
+
throw(AssertionError, self.max_len, text_len)
|
297
|
+
|
298
|
+
# Parameter.
|
299
|
+
text = text.strip()
|
300
|
+
if from_lang is None:
|
301
|
+
from_lang = self.get_lang(text)
|
302
|
+
from_lang = from_lang or FetchRequestBaiduTranslateLangAutoEnum.AUTO
|
303
|
+
if to_lang is None:
|
304
|
+
if from_lang == FetchRequestBaiduTranslateLangEnum.EN:
|
305
|
+
to_lang = FetchRequestBaiduTranslateLangEnum.ZH
|
306
|
+
else:
|
307
|
+
to_lang = FetchRequestBaiduTranslateLangEnum.EN
|
308
|
+
|
309
|
+
# Request.
|
310
|
+
self.db_record['request_time'] = now()
|
311
|
+
response_dict = self.request(text, from_lang, to_lang)
|
312
|
+
self.db_record['response_time'] = now()
|
313
|
+
|
314
|
+
# Extract.
|
315
|
+
trans_text = '\n'.join(
|
316
|
+
[
|
317
|
+
trans_text_line_dict['dst']
|
318
|
+
for trans_text_line_dict in response_dict['trans_result']
|
319
|
+
]
|
320
|
+
)
|
321
|
+
|
322
|
+
# Database.
|
323
|
+
self.db_record['input'] = text
|
324
|
+
self.db_record['output'] = trans_text
|
325
|
+
self.db_record['input_lang'] = from_lang
|
326
|
+
self.db_record['output_lang'] = to_lang
|
327
|
+
self.db_record.record()
|
328
|
+
|
329
|
+
return trans_text
|
330
|
+
|
331
|
+
|
332
|
+
def build_db(self) -> None:
|
333
|
+
"""
|
334
|
+
Check and build database tables, by `self.db_names`.
|
335
|
+
"""
|
336
|
+
|
337
|
+
# Check.
|
338
|
+
if self.db is None:
|
339
|
+
throw(ValueError, self.db)
|
340
|
+
|
341
|
+
# Parameter.
|
342
|
+
|
343
|
+
## Table.
|
344
|
+
tables = [DatabaseTableBaiduTrans]
|
345
|
+
DatabaseTableBaiduTrans._set_name(self.db_names['baidu_trans'])
|
346
|
+
|
347
|
+
## View stats.
|
348
|
+
views_stats = [
|
349
|
+
{
|
350
|
+
'path': self.db_names['stats_baidu_trans'],
|
351
|
+
'items': [
|
352
|
+
{
|
353
|
+
'name': 'count',
|
354
|
+
'select': (
|
355
|
+
'SELECT COUNT(1)\n'
|
356
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
357
|
+
),
|
358
|
+
'comment': 'Request count.'
|
359
|
+
},
|
360
|
+
{
|
361
|
+
'name': 'past_day_count',
|
362
|
+
'select': (
|
363
|
+
'SELECT COUNT(1)\n'
|
364
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
365
|
+
'WHERE TIMESTAMPDIFF(DAY, `request_time`, NOW()) = 0'
|
366
|
+
),
|
367
|
+
'comment': 'Request count in the past day.'
|
368
|
+
},
|
369
|
+
{
|
370
|
+
'name': 'past_week_count',
|
371
|
+
'select': (
|
372
|
+
'SELECT COUNT(1)\n'
|
373
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
374
|
+
'WHERE TIMESTAMPDIFF(DAY, `request_time`, NOW()) <= 6'
|
375
|
+
),
|
376
|
+
'comment': 'Request count in the past week.'
|
377
|
+
},
|
378
|
+
{
|
379
|
+
'name': 'past_month_count',
|
380
|
+
'select': (
|
381
|
+
'SELECT COUNT(1)\n'
|
382
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
383
|
+
'WHERE TIMESTAMPDIFF(DAY, `request_time`, NOW()) <= 29'
|
384
|
+
),
|
385
|
+
'comment': 'Request count in the past month.'
|
386
|
+
},
|
387
|
+
{
|
388
|
+
'name': 'total_input',
|
389
|
+
'select': (
|
390
|
+
'SELECT FORMAT(SUM(LENGTH(`input`)), 0)\n'
|
391
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
392
|
+
),
|
393
|
+
'comment': 'Input original text total character.'
|
394
|
+
},
|
395
|
+
{
|
396
|
+
'name': 'total_output',
|
397
|
+
'select': (
|
398
|
+
'SELECT FORMAT(SUM(LENGTH(`output`)), 0)\n'
|
399
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
400
|
+
),
|
401
|
+
'comment': 'Output translation text total character.'
|
402
|
+
},
|
403
|
+
{
|
404
|
+
'name': 'avg_input',
|
405
|
+
'select': (
|
406
|
+
'SELECT FORMAT(AVG(LENGTH(`input`)), 0)\n'
|
407
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
408
|
+
),
|
409
|
+
'comment': 'Input original text average character.'
|
410
|
+
},
|
411
|
+
{
|
412
|
+
'name': 'avg_output',
|
413
|
+
'select': (
|
414
|
+
'SELECT FORMAT(AVG(LENGTH(`output`)), 0)\n'
|
415
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
416
|
+
),
|
417
|
+
'comment': 'Output translation text average character.'
|
418
|
+
},
|
419
|
+
{
|
420
|
+
'name': 'last_time',
|
421
|
+
'select': (
|
422
|
+
'SELECT MAX(`request_time`)\n'
|
423
|
+
f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
|
424
|
+
),
|
425
|
+
'comment': 'Last record request time.'
|
426
|
+
}
|
427
|
+
]
|
428
|
+
}
|
429
|
+
]
|
430
|
+
|
431
|
+
# Build.
|
432
|
+
self.db.build.build(tables=tables, views_stats=views_stats, skip=True)
|
433
|
+
|
434
|
+
|
435
|
+
__call__ = trans
|
436
|
+
|
437
|
+
|
438
|
+
def crawl_baidu_trans(text: str) -> str:
|
439
|
+
"""
|
440
|
+
Crawl baidu translate text.
|
441
|
+
|
442
|
+
Parameters
|
443
|
+
----------
|
444
|
+
text : Text to be translated.
|
445
|
+
|
446
|
+
Retuens
|
447
|
+
-------
|
448
|
+
Translated text.
|
449
|
+
"""
|
450
|
+
|
451
|
+
# Parameter.
|
452
|
+
url = 'https://fanyi.baidu.com/sug'
|
453
|
+
data = {
|
454
|
+
'kw': text
|
455
|
+
}
|
456
|
+
|
457
|
+
# Requests.
|
458
|
+
response = reykit_request(url, data)
|
459
|
+
response_data = response.json()['data']
|
460
|
+
|
461
|
+
# Handle result.
|
462
|
+
if not len(response_data):
|
463
|
+
return
|
464
|
+
translate_data = response_data[0]['v']
|
465
|
+
translate_text = translate_data.split(';')[0].split('. ')[-1]
|
466
|
+
|
467
|
+
return translate_text
|