reyfetch 1.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reyfetch/rall.py ADDED
@@ -0,0 +1,19 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2022-12-08 13:11:09
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : All methods.
9
+ """
10
+
11
+
12
+ from .rali import *
13
+ from .rbaidu import *
14
+ from .rbase import *
15
+ from .rdouban import *
16
+ from .rgeneral import *
17
+ from .rsina import *
18
+ from .rtoutiao import *
19
+ from .rweibo import *
reyfetch/rbaidu.py ADDED
@@ -0,0 +1,467 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2024-01-11 21:56:56
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : Baidu Web fetch methods.
9
+ """
10
+
11
+
12
+ from typing import TypedDict
13
+ from enum import StrEnum
14
+ from reydb import rorm
15
+ from reydb.rdb import Database
16
+ from reykit.rbase import throw
17
+ from reykit.rnet import request as reykit_request
18
+ from reykit.ros import get_md5
19
+ from reykit.rrand import randn
20
+ from reykit.rtext import is_zh
21
+ from reykit.rtime import now
22
+
23
+ from .rbase import FetchRequest, FetchRequestWithDatabase, FetchRequestDatabaseRecord
24
+
25
+
26
+ __all__ = (
27
+ 'DatabaseTableBaiduTrans',
28
+ 'FetchRequestBaidu',
29
+ 'FetchRequestBaiduTranslateLangEnum',
30
+ 'FetchRequestBaiduTranslateLangAutoEnum',
31
+ 'FetchRequestBaiduTranslate',
32
+ 'crawl_baidu_trans'
33
+ )
34
+
35
+
36
+ FanyiResponseResult = TypedDict('FanyiResponseResult', {'src': str, 'dst': str})
37
+ FanyiResponse = TypedDict('FanyiResponse', {'from': str, 'to': str, 'trans_result': list[FanyiResponseResult]})
38
+
39
+
40
+ class DatabaseTableBaiduTrans(rorm.Model, table=True):
41
+ """
42
+ Database `baidu_trans` table model.
43
+ """
44
+
45
+ __name__ = 'baidu_trans'
46
+ __comment__ = 'Baidu API translate request record table.'
47
+ id: int = rorm.Field(rorm.types_mysql.INTEGER(unsigned=True), key_auto=True, comment='ID.')
48
+ request_time: rorm.Datetime = rorm.Field(not_null=True, comment='Request time.')
49
+ response_time: rorm.Datetime = rorm.Field(not_null=True, comment='Response time.')
50
+ input: str = rorm.Field(rorm.types.VARCHAR(6000), not_null=True, comment='Input original text.')
51
+ output: str = rorm.Field(rorm.types.TEXT, not_null=True, comment='Output translation text.')
52
+ input_lang: str = rorm.Field(rorm.types.VARCHAR(4), not_null=True, comment='Input original text language.')
53
+ output_lang: str = rorm.Field(rorm.types.VARCHAR(3), not_null=True, comment='Output translation text language.')
54
+
55
+
56
+ class FetchRequestBaidu(FetchRequest):
57
+ """
58
+ Request Baidu API fetch type.
59
+ """
60
+
61
+
62
+ class FetchRequestBaiduTranslateLangEnum(FetchRequestBaidu, StrEnum):
63
+ """
64
+ Request Baidu translate APT language enumeration fetch type.
65
+ """
66
+
67
+ ZH = 'zh'
68
+ EN = 'en'
69
+ YUE = 'yue'
70
+ KOR = 'kor'
71
+ TH = 'th'
72
+ PT = 'pt'
73
+ EL = 'el'
74
+ BUL = 'bul'
75
+ FIN = 'fin'
76
+ SLO = 'slo'
77
+ CHT = 'cht'
78
+ WYW = 'wyw'
79
+ FRA = 'fra'
80
+ ARA = 'ara'
81
+ DE = 'de'
82
+ NL = 'nl'
83
+ EST = 'est'
84
+ CS = 'cs'
85
+ SWE = 'swe'
86
+ VIE = 'vie'
87
+ JP = 'jp'
88
+ SPA = 'spa'
89
+ RU = 'ru'
90
+ IT = 'it'
91
+ PL = 'pl'
92
+ DAN = 'dan'
93
+ ROM = 'rom'
94
+ HU ='hu'
95
+
96
+
97
+ class FetchRequestBaiduTranslateLangAutoEnum(FetchRequestBaidu, StrEnum):
98
+ """
99
+ Request Baidu translate APT language auto enumeration fetch type.
100
+ """
101
+
102
+ AUTO = 'auto'
103
+
104
+
105
+ class FetchRequestBaiduTranslate(FetchRequestBaidu, FetchRequestWithDatabase):
106
+ """
107
+ Request Baidu translate API fetch type.
108
+ Can create database used `self.build_db` method.
109
+
110
+ Attributes
111
+ ----------
112
+ url_api : API request URL.
113
+ url_doc : API document URL.
114
+ LangEnum : Baidu Fanyi APT language enumeration type.
115
+ LangEnum : Baidu Fanyi APT language auto type enumeration.
116
+ db_names : Database table name mapping dictionary.
117
+ """
118
+
119
+ url_api = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
120
+ url_doc = 'https://fanyi-api.baidu.com/product/113'
121
+ LangEnum = FetchRequestBaiduTranslateLangEnum
122
+ LangAutoEnum = FetchRequestBaiduTranslateLangAutoEnum
123
+ db_names = {
124
+ 'baidu_trans': 'baidu_trans',
125
+ 'stats_baidu_trans': 'stats_baidu_trans'
126
+ }
127
+
128
+
129
+ def __init__(
130
+ self,
131
+ appid: str,
132
+ appkey: str,
133
+ db: Database | None = None,
134
+ max_len: int = 6000
135
+ ) -> None:
136
+ """
137
+ Build instance attributes.
138
+
139
+ Parameters
140
+ ----------
141
+ appid : APP ID.
142
+ appkey : APP key.
143
+ db : `Database` instance, insert request record to table.
144
+ max_len : Maximun length.
145
+ """
146
+
147
+ # Build.
148
+ self.appid = appid
149
+ self.appkey = appkey
150
+ self.db = db
151
+ self.max_len = max_len
152
+
153
+ # Database.
154
+ self.db_record = FetchRequestDatabaseRecord(self, 'api', 'baidu_trans')
155
+
156
+
157
+ def sign(self, text: str, num: int) -> str:
158
+ """
159
+ Get signature.
160
+
161
+ Parameters
162
+ ----------
163
+ text : Text.
164
+ num : Number.
165
+
166
+ Returns
167
+ -------
168
+ Signature.
169
+ """
170
+
171
+ # Check.
172
+ if text == '':
173
+ throw(ValueError, text)
174
+
175
+ # Parameter.
176
+ num_str = str(num)
177
+
178
+ # Sign.
179
+ data = ''.join(
180
+ (
181
+ self.appid,
182
+ text,
183
+ num_str,
184
+ self.appkey
185
+ )
186
+ )
187
+ md5 = get_md5(data)
188
+
189
+ return md5
190
+
191
+
192
+ def request(
193
+ self,
194
+ text: str,
195
+ from_lang: FetchRequestBaiduTranslateLangEnum | FetchRequestBaiduTranslateLangAutoEnum,
196
+ to_lang: FetchRequestBaiduTranslateLangEnum
197
+ ) -> FanyiResponse:
198
+ """
199
+ Request translate API.
200
+
201
+ Parameters
202
+ ----------
203
+ text : Text.
204
+ from_lang : Source language.
205
+ to_lang : Target language.
206
+
207
+ Returns
208
+ -------
209
+ Response dictionary.
210
+ """
211
+
212
+ # Parameter.
213
+ rand_num = randn(32768, 65536)
214
+ sign = self.sign(text, rand_num)
215
+ params = {
216
+ 'q': text,
217
+ 'from': from_lang.value,
218
+ 'to': to_lang.value,
219
+ 'appid': self.appid,
220
+ 'salt': rand_num,
221
+ 'sign': sign
222
+ }
223
+ headers = {'Content-Type': 'application/x-www-form-urlencoded'}
224
+
225
+ # Request.
226
+ response = reykit_request(
227
+ self.url_api,
228
+ params,
229
+ headers=headers,
230
+ check=True
231
+ )
232
+
233
+ # Check.
234
+ content_type = response.headers['Content-Type']
235
+ if content_type.startswith('application/json'):
236
+ response_json: dict = response.json()
237
+ if 'error_code' in response_json:
238
+ throw(AssertionError, response_json)
239
+ else:
240
+ throw(AssertionError, content_type)
241
+
242
+ return response_json
243
+
244
+
245
+ def get_lang(self, text: str) -> FetchRequestBaiduTranslateLangEnum | None:
246
+ """
247
+ Judge and get text language type.
248
+
249
+ Parameters
250
+ ----------
251
+ text : Text.
252
+
253
+ Returns
254
+ -------
255
+ Language type or null.
256
+ """
257
+
258
+ # Hangle parameter.
259
+ en_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
260
+
261
+ # Judge.
262
+ for char in text:
263
+ if char in en_chars:
264
+ return FetchRequestBaiduTranslateLangEnum.EN
265
+ elif is_zh(char):
266
+ return FetchRequestBaiduTranslateLangEnum.ZH
267
+
268
+
269
+ def trans(
270
+ self,
271
+ text: str,
272
+ from_lang: FetchRequestBaiduTranslateLangEnum | FetchRequestBaiduTranslateLangAutoEnum | None = None,
273
+ to_lang: FetchRequestBaiduTranslateLangEnum | None = None
274
+ ) -> str:
275
+ """
276
+ Translate.
277
+
278
+ Parameters
279
+ ----------
280
+ text : Text.
281
+ - `self.is_auth is True`: Maximum length is 6000.
282
+ - `self.is_auth is False`: Maximum length is 3000.
283
+ from_lang : Source language.
284
+ - `None`: Automatic judgment.
285
+ to_lang : Target language.
286
+ - `None`: Automatic judgment.
287
+
288
+ Returns
289
+ -------
290
+ Translated text.
291
+ """
292
+
293
+ # Check.
294
+ text_len = len(text)
295
+ if len(text) > self.max_len:
296
+ throw(AssertionError, self.max_len, text_len)
297
+
298
+ # Parameter.
299
+ text = text.strip()
300
+ if from_lang is None:
301
+ from_lang = self.get_lang(text)
302
+ from_lang = from_lang or FetchRequestBaiduTranslateLangAutoEnum.AUTO
303
+ if to_lang is None:
304
+ if from_lang == FetchRequestBaiduTranslateLangEnum.EN:
305
+ to_lang = FetchRequestBaiduTranslateLangEnum.ZH
306
+ else:
307
+ to_lang = FetchRequestBaiduTranslateLangEnum.EN
308
+
309
+ # Request.
310
+ self.db_record['request_time'] = now()
311
+ response_dict = self.request(text, from_lang, to_lang)
312
+ self.db_record['response_time'] = now()
313
+
314
+ # Extract.
315
+ trans_text = '\n'.join(
316
+ [
317
+ trans_text_line_dict['dst']
318
+ for trans_text_line_dict in response_dict['trans_result']
319
+ ]
320
+ )
321
+
322
+ # Database.
323
+ self.db_record['input'] = text
324
+ self.db_record['output'] = trans_text
325
+ self.db_record['input_lang'] = from_lang
326
+ self.db_record['output_lang'] = to_lang
327
+ self.db_record.record()
328
+
329
+ return trans_text
330
+
331
+
332
+ def build_db(self) -> None:
333
+ """
334
+ Check and build database tables, by `self.db_names`.
335
+ """
336
+
337
+ # Check.
338
+ if self.db is None:
339
+ throw(ValueError, self.db)
340
+
341
+ # Parameter.
342
+
343
+ ## Table.
344
+ tables = [DatabaseTableBaiduTrans]
345
+ DatabaseTableBaiduTrans._set_name(self.db_names['baidu_trans'])
346
+
347
+ ## View stats.
348
+ views_stats = [
349
+ {
350
+ 'path': self.db_names['stats_baidu_trans'],
351
+ 'items': [
352
+ {
353
+ 'name': 'count',
354
+ 'select': (
355
+ 'SELECT COUNT(1)\n'
356
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
357
+ ),
358
+ 'comment': 'Request count.'
359
+ },
360
+ {
361
+ 'name': 'past_day_count',
362
+ 'select': (
363
+ 'SELECT COUNT(1)\n'
364
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
365
+ 'WHERE TIMESTAMPDIFF(DAY, `request_time`, NOW()) = 0'
366
+ ),
367
+ 'comment': 'Request count in the past day.'
368
+ },
369
+ {
370
+ 'name': 'past_week_count',
371
+ 'select': (
372
+ 'SELECT COUNT(1)\n'
373
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
374
+ 'WHERE TIMESTAMPDIFF(DAY, `request_time`, NOW()) <= 6'
375
+ ),
376
+ 'comment': 'Request count in the past week.'
377
+ },
378
+ {
379
+ 'name': 'past_month_count',
380
+ 'select': (
381
+ 'SELECT COUNT(1)\n'
382
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
383
+ 'WHERE TIMESTAMPDIFF(DAY, `request_time`, NOW()) <= 29'
384
+ ),
385
+ 'comment': 'Request count in the past month.'
386
+ },
387
+ {
388
+ 'name': 'total_input',
389
+ 'select': (
390
+ 'SELECT FORMAT(SUM(LENGTH(`input`)), 0)\n'
391
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
392
+ ),
393
+ 'comment': 'Input original text total character.'
394
+ },
395
+ {
396
+ 'name': 'total_output',
397
+ 'select': (
398
+ 'SELECT FORMAT(SUM(LENGTH(`output`)), 0)\n'
399
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
400
+ ),
401
+ 'comment': 'Output translation text total character.'
402
+ },
403
+ {
404
+ 'name': 'avg_input',
405
+ 'select': (
406
+ 'SELECT FORMAT(AVG(LENGTH(`input`)), 0)\n'
407
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
408
+ ),
409
+ 'comment': 'Input original text average character.'
410
+ },
411
+ {
412
+ 'name': 'avg_output',
413
+ 'select': (
414
+ 'SELECT FORMAT(AVG(LENGTH(`output`)), 0)\n'
415
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
416
+ ),
417
+ 'comment': 'Output translation text average character.'
418
+ },
419
+ {
420
+ 'name': 'last_time',
421
+ 'select': (
422
+ 'SELECT MAX(`request_time`)\n'
423
+ f'FROM `{self.db.database}`.`{self.db_names['baidu_trans']}`'
424
+ ),
425
+ 'comment': 'Last record request time.'
426
+ }
427
+ ]
428
+ }
429
+ ]
430
+
431
+ # Build.
432
+ self.db.build.build(tables=tables, views_stats=views_stats, skip=True)
433
+
434
+
435
+ __call__ = trans
436
+
437
+
438
+ def crawl_baidu_trans(text: str) -> str:
439
+ """
440
+ Crawl baidu translate text.
441
+
442
+ Parameters
443
+ ----------
444
+ text : Text to be translated.
445
+
446
+ Retuens
447
+ -------
448
+ Translated text.
449
+ """
450
+
451
+ # Parameter.
452
+ url = 'https://fanyi.baidu.com/sug'
453
+ data = {
454
+ 'kw': text
455
+ }
456
+
457
+ # Requests.
458
+ response = reykit_request(url, data)
459
+ response_data = response.json()['data']
460
+
461
+ # Handle result.
462
+ if not len(response_data):
463
+ return
464
+ translate_data = response_data[0]['v']
465
+ translate_text = translate_data.split(';')[0].split('. ')[-1]
466
+
467
+ return translate_text