scrapling 0.2.99__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. scrapling/__init__.py +18 -31
  2. scrapling/cli.py +818 -20
  3. scrapling/core/_html_utils.py +348 -0
  4. scrapling/core/_types.py +34 -17
  5. scrapling/core/ai.py +611 -0
  6. scrapling/core/custom_types.py +183 -100
  7. scrapling/core/mixins.py +27 -19
  8. scrapling/core/shell.py +647 -0
  9. scrapling/core/{storage_adaptors.py → storage.py} +41 -33
  10. scrapling/core/translator.py +20 -26
  11. scrapling/core/utils.py +49 -54
  12. scrapling/engines/__init__.py +15 -6
  13. scrapling/engines/_browsers/__init__.py +2 -0
  14. scrapling/engines/_browsers/_camoufox.py +759 -0
  15. scrapling/engines/_browsers/_config_tools.py +130 -0
  16. scrapling/engines/_browsers/_controllers.py +644 -0
  17. scrapling/engines/_browsers/_page.py +93 -0
  18. scrapling/engines/_browsers/_validators.py +170 -0
  19. scrapling/engines/constants.py +101 -88
  20. scrapling/engines/static.py +667 -110
  21. scrapling/engines/toolbelt/__init__.py +20 -6
  22. scrapling/engines/toolbelt/bypasses/playwright_fingerprint.js +2 -1
  23. scrapling/engines/toolbelt/convertor.py +254 -0
  24. scrapling/engines/toolbelt/custom.py +158 -175
  25. scrapling/engines/toolbelt/fingerprints.py +32 -46
  26. scrapling/engines/toolbelt/navigation.py +68 -39
  27. scrapling/fetchers.py +239 -333
  28. scrapling/parser.py +781 -449
  29. scrapling-0.3.1.dist-info/METADATA +411 -0
  30. scrapling-0.3.1.dist-info/RECORD +41 -0
  31. {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/WHEEL +1 -1
  32. {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/top_level.txt +0 -1
  33. scrapling/defaults.py +0 -25
  34. scrapling/engines/camo.py +0 -339
  35. scrapling/engines/pw.py +0 -465
  36. scrapling/engines/toolbelt/bypasses/pdf_viewer.js +0 -5
  37. scrapling-0.2.99.dist-info/METADATA +0 -290
  38. scrapling-0.2.99.dist-info/RECORD +0 -49
  39. tests/__init__.py +0 -1
  40. tests/fetchers/__init__.py +0 -1
  41. tests/fetchers/async/__init__.py +0 -0
  42. tests/fetchers/async/test_camoufox.py +0 -97
  43. tests/fetchers/async/test_httpx.py +0 -85
  44. tests/fetchers/async/test_playwright.py +0 -101
  45. tests/fetchers/sync/__init__.py +0 -0
  46. tests/fetchers/sync/test_camoufox.py +0 -70
  47. tests/fetchers/sync/test_httpx.py +0 -84
  48. tests/fetchers/sync/test_playwright.py +0 -89
  49. tests/fetchers/test_utils.py +0 -97
  50. tests/parser/__init__.py +0 -0
  51. tests/parser/test_automatch.py +0 -111
  52. tests/parser/test_general.py +0 -330
  53. {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/entry_points.txt +0 -0
  54. {scrapling-0.2.99.dist-info → scrapling-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,29 @@
1
1
  """
2
2
  Functions related to custom types or type checking
3
3
  """
4
- import inspect
4
+
5
5
  from email.message import Message
6
6
 
7
- from scrapling.core._types import (Any, Callable, Dict, List, Optional, Tuple,
8
- Type, Union)
7
+ from scrapling.core._types import (
8
+ Any,
9
+ Dict,
10
+ List,
11
+ Optional,
12
+ Tuple,
13
+ )
9
14
  from scrapling.core.custom_types import MappingProxyType
10
15
  from scrapling.core.utils import log, lru_cache
11
- from scrapling.parser import Adaptor, SQLiteStorageSystem
16
+ from scrapling.parser import Selector, SQLiteStorageSystem
12
17
 
13
18
 
14
19
  class ResponseEncoding:
15
20
  __DEFAULT_ENCODING = "utf-8"
16
- __ISO_8859_1_CONTENT_TYPES = {"text/plain", "text/html", "text/css", "text/javascript"}
21
+ __ISO_8859_1_CONTENT_TYPES = {
22
+ "text/plain",
23
+ "text/html",
24
+ "text/css",
25
+ "text/javascript",
26
+ }
17
27
 
18
28
  @classmethod
19
29
  @lru_cache(maxsize=128)
@@ -27,19 +37,21 @@ class ResponseEncoding:
27
37
  """
28
38
  # Create a Message object and set the Content-Type header then get the content type and parameters
29
39
  msg = Message()
30
- msg['content-type'] = header_value
40
+ msg["content-type"] = header_value
31
41
 
32
42
  content_type = msg.get_content_type()
33
43
  params = dict(msg.get_params(failobj=[]))
34
44
 
35
45
  # Remove the content-type from params if present somehow
36
- params.pop('content-type', None)
46
+ params.pop("content-type", None)
37
47
 
38
48
  return content_type, params
39
49
 
40
50
  @classmethod
41
51
  @lru_cache(maxsize=128)
42
- def get_value(cls, content_type: Optional[str], text: Optional[str] = 'test') -> str:
52
+ def get_value(
53
+ cls, content_type: Optional[str], text: Optional[str] = "test"
54
+ ) -> str:
43
55
  """Determine the appropriate character encoding from a content-type header.
44
56
 
45
57
  The encoding is determined by these rules in order:
@@ -72,7 +84,9 @@ class ResponseEncoding:
72
84
  encoding = cls.__DEFAULT_ENCODING
73
85
 
74
86
  if encoding:
75
- _ = text.encode(encoding) # Validate encoding and validate it can encode the given text
87
+ _ = text.encode(
88
+ encoding
89
+ ) # Validate encoding and validate it can encode the given text
76
90
  return encoding
77
91
 
78
92
  return cls.__DEFAULT_ENCODING
@@ -81,48 +95,74 @@ class ResponseEncoding:
81
95
  return cls.__DEFAULT_ENCODING
82
96
 
83
97
 
84
- class Response(Adaptor):
98
+ class Response(Selector):
85
99
  """This class is returned by all engines as a way to unify response type between different libraries."""
86
100
 
87
- def __init__(self, url: str, text: str, body: bytes, status: int, reason: str, cookies: Dict, headers: Dict, request_headers: Dict,
88
- encoding: str = 'utf-8', method: str = 'GET', history: List = None, **adaptor_arguments: Dict):
89
- automatch_domain = adaptor_arguments.pop('automatch_domain', None)
101
+ def __init__(
102
+ self,
103
+ url: str,
104
+ content: str | bytes,
105
+ status: int,
106
+ reason: str,
107
+ cookies: Tuple[Dict[str, str], ...] | Dict[str, str],
108
+ headers: Dict,
109
+ request_headers: Dict,
110
+ encoding: str = "utf-8",
111
+ method: str = "GET",
112
+ history: List = None,
113
+ **selector_config: Dict,
114
+ ):
115
+ adaptive_domain = selector_config.pop("adaptive_domain", None)
90
116
  self.status = status
91
117
  self.reason = reason
92
118
  self.cookies = cookies
93
119
  self.headers = headers
94
120
  self.request_headers = request_headers
95
121
  self.history = history or []
96
- encoding = ResponseEncoding.get_value(encoding, text)
97
- super().__init__(text=text, body=body, url=automatch_domain or url, encoding=encoding, **adaptor_arguments)
98
- # For back-ward compatibility
99
- self.adaptor = self
122
+ encoding = ResponseEncoding.get_value(
123
+ encoding, content.decode("utf-8") if isinstance(content, bytes) else content
124
+ )
125
+ super().__init__(
126
+ content=content,
127
+ url=adaptive_domain or url,
128
+ encoding=encoding,
129
+ **selector_config,
130
+ )
100
131
  # For easier debugging while working from a Python shell
101
- log.info(f'Fetched ({status}) <{method} {url}> (referer: {request_headers.get("referer")})')
102
-
103
- # def __repr__(self):
104
- # return f'<{self.__class__.__name__} [{self.status} {self.reason}]>'
132
+ log.info(
133
+ f"Fetched ({status}) <{method} {url}> (referer: {request_headers.get('referer')})"
134
+ )
105
135
 
106
136
 
107
137
  class BaseFetcher:
108
138
  __slots__ = ()
109
139
  huge_tree: bool = True
110
- auto_match: Optional[bool] = False
140
+ adaptive: Optional[bool] = False
111
141
  storage: Any = SQLiteStorageSystem
112
142
  keep_cdata: Optional[bool] = False
113
143
  storage_args: Optional[Dict] = None
114
144
  keep_comments: Optional[bool] = False
115
- automatch_domain: Optional[str] = None
116
- parser_keywords: Tuple = ('huge_tree', 'auto_match', 'storage', 'keep_cdata', 'storage_args', 'keep_comments', 'automatch_domain',) # Left open for the user
145
+ adaptive_domain: Optional[str] = None
146
+ parser_keywords: Tuple = (
147
+ "huge_tree",
148
+ "adaptive",
149
+ "storage",
150
+ "keep_cdata",
151
+ "storage_args",
152
+ "keep_comments",
153
+ "adaptive_domain",
154
+ ) # Left open for the user
117
155
 
118
156
  def __init__(self, *args, **kwargs):
119
157
  # For backward-compatibility before 0.2.99
120
- args_str = ", ".join(args) or ''
121
- kwargs_str = ", ".join(f'{k}={v}' for k, v in kwargs.items()) or ''
158
+ args_str = ", ".join(args) or ""
159
+ kwargs_str = ", ".join(f"{k}={v}" for k, v in kwargs.items()) or ""
122
160
  if args_str:
123
- args_str += ', '
161
+ args_str += ", "
124
162
 
125
- log.warning(f'This logic is deprecated now, and have no effect; It will be removed with v0.3. Use `{self.__class__.__name__}.configure({args_str}{kwargs_str})` instead before fetching')
163
+ log.warning(
164
+ f"This logic is deprecated now, and have no effect; It will be removed with v0.3. Use `{self.__class__.__name__}.configure({args_str}{kwargs_str})` instead before fetching"
165
+ )
126
166
  pass
127
167
 
128
168
  @classmethod
@@ -131,17 +171,17 @@ class BaseFetcher:
131
171
  huge_tree=cls.huge_tree,
132
172
  keep_comments=cls.keep_comments,
133
173
  keep_cdata=cls.keep_cdata,
134
- auto_match=cls.auto_match,
174
+ adaptive=cls.adaptive,
135
175
  storage=cls.storage,
136
176
  storage_args=cls.storage_args,
137
- automatch_domain=cls.automatch_domain,
177
+ adaptive_domain=cls.adaptive_domain,
138
178
  )
139
179
 
140
180
  @classmethod
141
181
  def configure(cls, **kwargs):
142
182
  """Set multiple arguments for the parser at once globally
143
183
 
144
- :param kwargs: The keywords can be any arguments of the following: huge_tree, keep_comments, keep_cdata, auto_match, storage, storage_args, automatch_domain
184
+ :param kwargs: The keywords can be any arguments of the following: huge_tree, keep_comments, keep_cdata, adaptive, storage, storage_args, adaptive_domain
145
185
  """
146
186
  for key, value in kwargs.items():
147
187
  key = key.strip().lower()
@@ -150,30 +190,38 @@ class BaseFetcher:
150
190
  setattr(cls, key, value)
151
191
  else:
152
192
  # Yup, no fun allowed LOL
153
- raise AttributeError(f'Unknown parser argument: "{key}"; maybe you meant {cls.parser_keywords}?')
193
+ raise AttributeError(
194
+ f'Unknown parser argument: "{key}"; maybe you meant {cls.parser_keywords}?'
195
+ )
154
196
  else:
155
- raise ValueError(f'Unknown parser argument: "{key}"; maybe you meant {cls.parser_keywords}?')
197
+ raise ValueError(
198
+ f'Unknown parser argument: "{key}"; maybe you meant {cls.parser_keywords}?'
199
+ )
156
200
 
157
201
  if not kwargs:
158
- raise AttributeError(f'You must pass a keyword to configure, current keywords: {cls.parser_keywords}?')
202
+ raise AttributeError(
203
+ f"You must pass a keyword to configure, current keywords: {cls.parser_keywords}?"
204
+ )
159
205
 
160
206
  @classmethod
161
207
  def _generate_parser_arguments(cls) -> Dict:
162
- # Adaptor class parameters
163
- # I won't validate Adaptor's class parameters here again, I will leave it to be validated later
208
+ # Selector class parameters
209
+ # I won't validate Selector's class parameters here again, I will leave it to be validated later
164
210
  parser_arguments = dict(
165
211
  huge_tree=cls.huge_tree,
166
212
  keep_comments=cls.keep_comments,
167
213
  keep_cdata=cls.keep_cdata,
168
- auto_match=cls.auto_match,
214
+ adaptive=cls.adaptive,
169
215
  storage=cls.storage,
170
- storage_args=cls.storage_args
216
+ storage_args=cls.storage_args,
171
217
  )
172
- if cls.automatch_domain:
173
- if type(cls.automatch_domain) is not str:
174
- log.warning('[Ignored] The argument "automatch_domain" must be of string type')
218
+ if cls.adaptive_domain:
219
+ if not isinstance(cls.adaptive_domain, str):
220
+ log.warning(
221
+ '[Ignored] The argument "adaptive_domain" must be of string type'
222
+ )
175
223
  else:
176
- parser_arguments.update({'automatch_domain': cls.automatch_domain})
224
+ parser_arguments.update({"adaptive_domain": cls.adaptive_domain})
177
225
 
178
226
  return parser_arguments
179
227
 
@@ -181,72 +229,75 @@ class BaseFetcher:
181
229
  class StatusText:
182
230
  """A class that gets the status text of response status code.
183
231
 
184
- Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
232
+ Reference: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
185
233
  """
186
- _phrases = MappingProxyType({
187
- 100: "Continue",
188
- 101: "Switching Protocols",
189
- 102: "Processing",
190
- 103: "Early Hints",
191
- 200: "OK",
192
- 201: "Created",
193
- 202: "Accepted",
194
- 203: "Non-Authoritative Information",
195
- 204: "No Content",
196
- 205: "Reset Content",
197
- 206: "Partial Content",
198
- 207: "Multi-Status",
199
- 208: "Already Reported",
200
- 226: "IM Used",
201
- 300: "Multiple Choices",
202
- 301: "Moved Permanently",
203
- 302: "Found",
204
- 303: "See Other",
205
- 304: "Not Modified",
206
- 305: "Use Proxy",
207
- 307: "Temporary Redirect",
208
- 308: "Permanent Redirect",
209
- 400: "Bad Request",
210
- 401: "Unauthorized",
211
- 402: "Payment Required",
212
- 403: "Forbidden",
213
- 404: "Not Found",
214
- 405: "Method Not Allowed",
215
- 406: "Not Acceptable",
216
- 407: "Proxy Authentication Required",
217
- 408: "Request Timeout",
218
- 409: "Conflict",
219
- 410: "Gone",
220
- 411: "Length Required",
221
- 412: "Precondition Failed",
222
- 413: "Payload Too Large",
223
- 414: "URI Too Long",
224
- 415: "Unsupported Media Type",
225
- 416: "Range Not Satisfiable",
226
- 417: "Expectation Failed",
227
- 418: "I'm a teapot",
228
- 421: "Misdirected Request",
229
- 422: "Unprocessable Entity",
230
- 423: "Locked",
231
- 424: "Failed Dependency",
232
- 425: "Too Early",
233
- 426: "Upgrade Required",
234
- 428: "Precondition Required",
235
- 429: "Too Many Requests",
236
- 431: "Request Header Fields Too Large",
237
- 451: "Unavailable For Legal Reasons",
238
- 500: "Internal Server Error",
239
- 501: "Not Implemented",
240
- 502: "Bad Gateway",
241
- 503: "Service Unavailable",
242
- 504: "Gateway Timeout",
243
- 505: "HTTP Version Not Supported",
244
- 506: "Variant Also Negotiates",
245
- 507: "Insufficient Storage",
246
- 508: "Loop Detected",
247
- 510: "Not Extended",
248
- 511: "Network Authentication Required"
249
- })
234
+
235
+ _phrases = MappingProxyType(
236
+ {
237
+ 100: "Continue",
238
+ 101: "Switching Protocols",
239
+ 102: "Processing",
240
+ 103: "Early Hints",
241
+ 200: "OK",
242
+ 201: "Created",
243
+ 202: "Accepted",
244
+ 203: "Non-Authoritative Information",
245
+ 204: "No Content",
246
+ 205: "Reset Content",
247
+ 206: "Partial Content",
248
+ 207: "Multi-Status",
249
+ 208: "Already Reported",
250
+ 226: "IM Used",
251
+ 300: "Multiple Choices",
252
+ 301: "Moved Permanently",
253
+ 302: "Found",
254
+ 303: "See Other",
255
+ 304: "Not Modified",
256
+ 305: "Use Proxy",
257
+ 307: "Temporary Redirect",
258
+ 308: "Permanent Redirect",
259
+ 400: "Bad Request",
260
+ 401: "Unauthorized",
261
+ 402: "Payment Required",
262
+ 403: "Forbidden",
263
+ 404: "Not Found",
264
+ 405: "Method Not Allowed",
265
+ 406: "Not Acceptable",
266
+ 407: "Proxy Authentication Required",
267
+ 408: "Request Timeout",
268
+ 409: "Conflict",
269
+ 410: "Gone",
270
+ 411: "Length Required",
271
+ 412: "Precondition Failed",
272
+ 413: "Payload Too Large",
273
+ 414: "URI Too Long",
274
+ 415: "Unsupported Media Type",
275
+ 416: "Range Not Satisfiable",
276
+ 417: "Expectation Failed",
277
+ 418: "I'm a teapot",
278
+ 421: "Misdirected Request",
279
+ 422: "Unprocessable Entity",
280
+ 423: "Locked",
281
+ 424: "Failed Dependency",
282
+ 425: "Too Early",
283
+ 426: "Upgrade Required",
284
+ 428: "Precondition Required",
285
+ 429: "Too Many Requests",
286
+ 431: "Request Header Fields Too Large",
287
+ 451: "Unavailable For Legal Reasons",
288
+ 500: "Internal Server Error",
289
+ 501: "Not Implemented",
290
+ 502: "Bad Gateway",
291
+ 503: "Service Unavailable",
292
+ 504: "Gateway Timeout",
293
+ 505: "HTTP Version Not Supported",
294
+ 506: "Variant Also Negotiates",
295
+ 507: "Insufficient Storage",
296
+ 508: "Loop Detected",
297
+ 510: "Not Extended",
298
+ 511: "Network Authentication Required",
299
+ }
300
+ )
250
301
 
251
302
  @classmethod
252
303
  @lru_cache(maxsize=128)
@@ -255,32 +306,6 @@ class StatusText:
255
306
  return cls._phrases.get(status_code, "Unknown Status Code")
256
307
 
257
308
 
258
- def check_if_engine_usable(engine: Callable) -> Union[Callable, None]:
259
- """This function check if the passed engine can be used by a Fetcher-type class or not.
260
-
261
- :param engine: The engine class itself
262
- :return: The engine class again if all checks out, otherwise raises error
263
- :raise TypeError: If engine class don't have fetch method, If engine class have fetch attribute not method, or If engine class have fetch function but it doesn't take arguments
264
- """
265
- # if isinstance(engine, type):
266
- # raise TypeError("Expected an engine instance, not a class definition of the engine")
267
-
268
- if hasattr(engine, 'fetch'):
269
- fetch_function = getattr(engine, "fetch")
270
- if callable(fetch_function):
271
- if len(inspect.signature(fetch_function).parameters) > 0:
272
- return engine
273
- else:
274
- # raise TypeError("Engine class instance must have a callable method 'fetch' with the first argument used for the url.")
275
- raise TypeError("Engine class must have a callable method 'fetch' with the first argument used for the url.")
276
- else:
277
- # raise TypeError("Invalid engine instance! Engine class must have a callable method 'fetch'")
278
- raise TypeError("Invalid engine class! Engine class must have a callable method 'fetch'")
279
- else:
280
- # raise TypeError("Invalid engine instance! Engine class must have the method 'fetch'")
281
- raise TypeError("Invalid engine class! Engine class must have the method 'fetch'")
282
-
283
-
284
309
  def get_variable_name(var: Any) -> Optional[str]:
285
310
  """Get the name of a variable using global and local scopes.
286
311
  :param var: The variable to find the name for
@@ -291,45 +316,3 @@ def get_variable_name(var: Any) -> Optional[str]:
291
316
  if value is var:
292
317
  return name
293
318
  return None
294
-
295
-
296
- def check_type_validity(variable: Any, valid_types: Union[List[Type], None], default_value: Any = None, critical: bool = False, param_name: Optional[str] = None) -> Any:
297
- """Check if a variable matches the specified type constraints.
298
- :param variable: The variable to check
299
- :param valid_types: List of valid types for the variable
300
- :param default_value: Value to return if type check fails
301
- :param critical: If True, raises TypeError instead of logging error
302
- :param param_name: Optional parameter name for error messages
303
- :return: The original variable if valid, default_value if invalid
304
- :raise TypeError: If critical=True and type check fails
305
- """
306
- # Use provided param_name or try to get it automatically
307
- var_name = param_name or get_variable_name(variable) or "Unknown"
308
-
309
- # Convert valid_types to a list if None
310
- valid_types = valid_types or []
311
-
312
- # Handle None value
313
- if variable is None:
314
- if type(None) in valid_types:
315
- return variable
316
- error_msg = f'Argument "{var_name}" cannot be None'
317
- if critical:
318
- raise TypeError(error_msg)
319
- log.error(f'[Ignored] {error_msg}')
320
- return default_value
321
-
322
- # If no valid_types specified and variable has a value, return it
323
- if not valid_types:
324
- return variable
325
-
326
- # Check if variable type matches any of the valid types
327
- if not any(isinstance(variable, t) for t in valid_types):
328
- type_names = [t.__name__ for t in valid_types]
329
- error_msg = f'Argument "{var_name}" must be of type {" or ".join(type_names)}'
330
- if critical:
331
- raise TypeError(error_msg)
332
- log.error(f'[Ignored] {error_msg}')
333
- return default_value
334
-
335
- return variable
@@ -2,19 +2,20 @@
2
2
  Functions related to generating headers and fingerprints generally
3
3
  """
4
4
 
5
- import platform
5
+ from platform import system as platform_system
6
6
 
7
- from browserforge.fingerprints import Fingerprint, FingerprintGenerator
8
- from browserforge.headers import Browser, HeaderGenerator
9
7
  from tldextract import extract
8
+ from browserforge.headers import Browser, HeaderGenerator
10
9
 
11
- from scrapling.core._types import Dict, Union
10
+ from scrapling.core._types import Dict, Optional
12
11
  from scrapling.core.utils import lru_cache
13
12
 
13
+ __OS_NAME__ = platform_system()
14
+
14
15
 
15
16
  @lru_cache(10, typed=True)
16
17
  def generate_convincing_referer(url: str) -> str:
17
- """Takes the domain from the URL without the subdomain/suffix and make it look like you were searching google for this website
18
+ """Takes the domain from the URL without the subdomain/suffix and make it look like you were searching Google for this website
18
19
 
19
20
  >>> generate_convincing_referer('https://www.somewebsite.com/blah')
20
21
  'https://www.google.com/search?q=somewebsite'
@@ -23,59 +24,44 @@ def generate_convincing_referer(url: str) -> str:
23
24
  :return: Google's search URL of the domain name
24
25
  """
25
26
  website_name = extract(url).domain
26
- return f'https://www.google.com/search?q={website_name}'
27
+ return f"https://www.google.com/search?q={website_name}"
27
28
 
28
29
 
29
30
  @lru_cache(1, typed=True)
30
- def get_os_name() -> Union[str, None]:
31
+ def get_os_name() -> Optional[str]:
31
32
  """Get the current OS name in the same format needed for browserforge
32
33
 
33
34
  :return: Current OS name or `None` otherwise
34
35
  """
35
- #
36
- os_name = platform.system()
37
36
  return {
38
- 'Linux': 'linux',
39
- 'Darwin': 'macos',
40
- 'Windows': 'windows',
41
- # For the future? because why not
42
- 'iOS': 'ios',
43
- }.get(os_name)
44
-
45
-
46
- def generate_suitable_fingerprint() -> Fingerprint:
47
- """Generates a browserforge's fingerprint that matches current OS, desktop device, and Chrome with version 128 at least.
48
-
49
- This function was originally created to test Browserforge's injector.
50
- :return: `Fingerprint` object
51
- """
52
- return FingerprintGenerator(
53
- browser=[Browser(name='chrome', min_version=128)],
54
- os=get_os_name(), # None is ignored
55
- device='desktop'
56
- ).generate()
37
+ "Linux": "linux",
38
+ "Darwin": "macos",
39
+ "Windows": "windows",
40
+ # For the future? because why not?
41
+ "iOS": "ios",
42
+ }.get(__OS_NAME__)
57
43
 
58
44
 
59
45
  def generate_headers(browser_mode: bool = False) -> Dict:
60
46
  """Generate real browser-like headers using browserforge's generator
61
47
 
62
- :param browser_mode: If enabled, the headers created are used for playwright so it have to match everything
48
+ :param browser_mode: If enabled, the headers created are used for playwright, so it has to match everything
63
49
  :return: A dictionary of the generated headers
64
50
  """
65
- if browser_mode:
66
- # In this mode we don't care about anything other than matching the OS and the browser type with the browser we are using
67
- # So we don't raise any inconsistency red flags while websites fingerprinting us
68
- os_name = get_os_name()
69
- return HeaderGenerator(
70
- browser=[Browser(name='chrome', min_version=130)],
71
- os=os_name, # None is ignored
72
- device='desktop'
73
- ).generate()
74
- else:
75
- # Here it's used for normal requests that aren't done through browsers so we can take it lightly
76
- browsers = [
77
- Browser(name='chrome', min_version=120),
78
- Browser(name='firefox', min_version=120),
79
- Browser(name='edge', min_version=120),
80
- ]
81
- return HeaderGenerator(browser=browsers, device='desktop').generate()
51
+ # In the browser mode, we don't care about anything other than matching the OS and the browser type with the browser we are using,
52
+ # So we don't raise any inconsistency red flags while websites fingerprinting us
53
+ os_name = get_os_name()
54
+ browsers = [Browser(name="chrome", min_version=130)]
55
+ if not browser_mode:
56
+ os_name = ("windows", "macos", "linux")
57
+ browsers.extend(
58
+ [
59
+ Browser(name="firefox", min_version=130),
60
+ Browser(name="edge", min_version=130),
61
+ ]
62
+ )
63
+
64
+ return HeaderGenerator(browser=browsers, os=os_name, device="desktop").generate()
65
+
66
+
67
+ __default_useragent__ = generate_headers(browser_mode=False).get("User-Agent")