reyfetch 1.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reyfetch/rbase.py ADDED
@@ -0,0 +1,243 @@
1
+ # !/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ @Time : 2023-12-29 23:14:18
6
+ @Author : Rey
7
+ @Contact : reyxbo@163.com
8
+ @Explain : Base methods.
9
+ """
10
+
11
+
12
+ from typing import Any, Literal
13
+ from types import MethodType
14
+ from threading import get_ident as threading_get_ident
15
+ from selenium.webdriver import Edge, Chrome, EdgeOptions, ChromeOptions
16
+ from reydb.rdb import Database
17
+ from reykit.rbase import Base
18
+ from reykit.rnet import join_url
19
+
20
+
21
+ __all__ = (
22
+ 'FetchBase',
23
+ 'FetchRequest',
24
+ 'FetchCrawl',
25
+ 'FetchBrowser',
26
+ 'crawl_page',
27
+ 'FetchRequestWithDatabase',
28
+ 'FetchRequestDatabaseRecord'
29
+ )
30
+
31
+
32
+ class FetchBase(Base):
33
+ """
34
+ Fetch base type.
35
+ """
36
+
37
+
38
+ class FetchRequest(FetchBase):
39
+ """
40
+ Request API fetch type.
41
+ """
42
+
43
+
44
+ class FetchCrawl(FetchBase):
45
+ """
46
+ Crawl Web fetch type.
47
+ """
48
+
49
+
50
+ class FetchBrowser(FetchBase):
51
+ """
52
+ Control browser fetch type.
53
+ """
54
+
55
+
56
+ def __init__(
57
+ self,
58
+ driver: Literal['edge', 'chrome'] = 'edge',
59
+ headless: bool = False
60
+ ) -> None:
61
+ """
62
+ Build instance attributes.
63
+
64
+ Parameters
65
+ ----------
66
+ driver : Browser driver type.
67
+ - `Literal['edge']`: Edge browser.
68
+ - `Literal['chrome']`: Chrome browser.
69
+ headless : Whether use headless mode.
70
+ """
71
+
72
+ # Parameter.
73
+ match driver:
74
+ case 'edge':
75
+ driver_type = Edge
76
+ driver_option_type = EdgeOptions
77
+ case 'chrome':
78
+ driver_type = Chrome
79
+ driver_option_type = ChromeOptions
80
+
81
+ # Option.
82
+ options = driver_option_type()
83
+
84
+ ## Headless.
85
+ if headless:
86
+ options.add_argument('--headless')
87
+
88
+ # Driver.
89
+ self.driver = driver_type(options)
90
+
91
+
92
+ def request(
93
+ self,
94
+ url: str,
95
+ params: dict[str, Any] | None = None
96
+ ) -> None:
97
+ """
98
+ Request URL.
99
+
100
+ Parameters
101
+ ----------
102
+ url : URL.
103
+ params : URL parameters.
104
+ """
105
+
106
+ # Parameter.
107
+ params = params or {}
108
+ url = join_url(url, params)
109
+
110
+ # Request.
111
+ self.driver.get(url)
112
+
113
+
114
+ @property
115
+ def page(self) -> str:
116
+ """
117
+ Return page elements document.
118
+
119
+ Returns
120
+ -------
121
+ Page elements document.
122
+ """
123
+
124
+ # Parameter.
125
+ page_source = self.driver.page_source
126
+
127
+ return page_source
128
+
129
+
130
+ __call__ = request
131
+
132
+
133
+ def crawl_page(
134
+ url: str,
135
+ params: dict[str, Any] | None = None
136
+ ) -> str:
137
+ """
138
+ Crawl page elements document.
139
+
140
+ Parameters
141
+ ----------
142
+ url : URL.
143
+ params : URL parameters.
144
+
145
+ Returns
146
+ -------
147
+ Page elements document.
148
+ """
149
+
150
+ # Parameter.
151
+ browser = FetchBrowser(headless=True)
152
+
153
+ # Request.
154
+ browser.request(url, params)
155
+
156
+ # Page.
157
+ page = browser.page
158
+
159
+ return page
160
+
161
+
162
+ class FetchRequestWithDatabase(FetchRequest):
163
+ """
164
+ With database method reuqest API fetch type.
165
+ Can create database used `self.build_db` method.
166
+ """
167
+
168
+ db: Database | None
169
+ db_names: dict[str, str]
170
+ build_db: MethodType
171
+
172
+
173
+ class FetchRequestDatabaseRecord(FetchRequest):
174
+ """
175
+ Request API fetch type of record into the database, can multi threaded.
176
+ """
177
+
178
+
179
+ def __init__(
180
+ self,
181
+ api: FetchRequestWithDatabase | None = None,
182
+ database: str | None = None,
183
+ table: str | None = None
184
+ ) -> None:
185
+ """
186
+ Build instance attributes.
187
+
188
+ Parameters
189
+ ----------
190
+ api : `API` instance.
191
+ - `None`: Not record.
192
+ database : Index `API.db_names` database name.
193
+ table : Index `API.db_names` table name.
194
+ """
195
+
196
+ # Build.
197
+ self.api = api
198
+ self.database = database
199
+ self.table = table
200
+ self.data: dict[int, dict[str, Any]] = {}
201
+
202
+
203
+ def __setitem__(self, key: str, value: Any) -> None:
204
+ """
205
+ Update record data parameter.
206
+
207
+ Parameters
208
+ ----------
209
+ key : Parameter key.
210
+ value : Parameter value.
211
+ """
212
+
213
+ # Check.
214
+ if self.api.db is None:
215
+ return
216
+
217
+ # Parameter.
218
+ thread_id = threading_get_ident()
219
+ record = self.data.setdefault(thread_id, {})
220
+
221
+ # Update.
222
+ record[key] = value
223
+
224
+
225
+ def record(self) -> None:
226
+ """
227
+ Insert record to table of database.
228
+ """
229
+
230
+ # Check.
231
+ if self.api.db is None:
232
+ return
233
+
234
+ # Parameter.
235
+ thread_id = threading_get_ident()
236
+ record = self.data.setdefault(thread_id, {})
237
+ table = self.api.db_names[self.table]
238
+
239
+ # Insert.
240
+ self.api.db.execute.insert(table, record)
241
+
242
+ # Delete.
243
+ del self.data[thread_id]