scrapling 0.1.2__py3-none-any.whl → 0.2.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. scrapling/__init__.py +4 -3
  2. scrapling/core/__init__.py +0 -0
  3. scrapling/core/_types.py +25 -0
  4. scrapling/{custom_types.py → core/custom_types.py} +48 -3
  5. scrapling/{mixins.py → core/mixins.py} +22 -7
  6. scrapling/{storage_adaptors.py → core/storage_adaptors.py} +2 -2
  7. scrapling/{translator.py → core/translator.py} +2 -12
  8. scrapling/{utils.py → core/utils.py} +14 -61
  9. scrapling/engines/__init__.py +7 -0
  10. scrapling/engines/camo.py +128 -0
  11. scrapling/engines/constants.py +108 -0
  12. scrapling/engines/pw.py +237 -0
  13. scrapling/engines/static.py +112 -0
  14. scrapling/engines/toolbelt/__init__.py +19 -0
  15. scrapling/engines/toolbelt/custom.py +154 -0
  16. scrapling/engines/toolbelt/fingerprints.py +81 -0
  17. scrapling/engines/toolbelt/navigation.py +108 -0
  18. scrapling/fetchers.py +198 -0
  19. scrapling/parser.py +223 -70
  20. scrapling/py.typed +1 -0
  21. scrapling-0.2.1.dist-info/METADATA +835 -0
  22. scrapling-0.2.1.dist-info/RECORD +33 -0
  23. {scrapling-0.1.2.dist-info → scrapling-0.2.1.dist-info}/WHEEL +1 -1
  24. {scrapling-0.1.2.dist-info → scrapling-0.2.1.dist-info}/top_level.txt +1 -0
  25. tests/__init__.py +1 -0
  26. tests/fetchers/__init__.py +1 -0
  27. tests/fetchers/test_camoufox.py +62 -0
  28. tests/fetchers/test_httpx.py +67 -0
  29. tests/fetchers/test_playwright.py +74 -0
  30. tests/parser/__init__.py +0 -0
  31. tests/parser/test_automatch.py +56 -0
  32. tests/parser/test_general.py +286 -0
  33. scrapling-0.1.2.dist-info/METADATA +0 -477
  34. scrapling-0.1.2.dist-info/RECORD +0 -12
  35. {scrapling-0.1.2.dist-info → scrapling-0.2.1.dist-info}/LICENSE +0 -0
scrapling/__init__.py CHANGED
@@ -1,10 +1,11 @@
1
1
  # Declare top-level shortcuts
2
+ from scrapling.fetchers import Fetcher, StealthyFetcher, PlayWrightFetcher, CustomFetcher
2
3
  from scrapling.parser import Adaptor, Adaptors
3
- from scrapling.custom_types import TextHandler, AttributesHandler
4
+ from scrapling.core.custom_types import TextHandler, AttributesHandler
4
5
 
5
6
  __author__ = "Karim Shoair (karim.shoair@pm.me)"
6
- __version__ = "0.1.2"
7
+ __version__ = "0.2.1"
7
8
  __copyright__ = "Copyright (c) 2024 Karim Shoair"
8
9
 
9
10
 
10
- __all__ = ['Adaptor', 'Adaptors', 'TextHandler', 'AttributesHandler']
11
+ __all__ = ['Adaptor', 'Fetcher', 'StealthyFetcher', 'PlayWrightFetcher']
File without changes
@@ -0,0 +1,25 @@
1
+ """
2
+ Type definitions for type checking purposes.
3
+ """
4
+
5
+ from typing import (
6
+ Dict, Optional, Union, Callable, Any, List, Tuple, Pattern, Generator, Iterable, Type, TYPE_CHECKING, Literal
7
+ )
8
+
9
+ try:
10
+ from typing import Protocol
11
+ except ImportError:
12
+ # Added in Python 3.8
13
+ Protocol = object
14
+
15
+ try:
16
+ from typing import SupportsIndex
17
+ except ImportError:
18
+ # 'SupportsIndex' got added in Python 3.8
19
+ SupportsIndex = None
20
+
21
+ if TYPE_CHECKING:
22
+ # typing.Self requires Python 3.11
23
+ from typing_extensions import Self
24
+ else:
25
+ Self = object
@@ -1,9 +1,9 @@
1
1
  import re
2
2
  from types import MappingProxyType
3
3
  from collections.abc import Mapping
4
- from typing import Dict, List, Union, Pattern
5
4
 
6
- from scrapling.utils import _is_iterable, flatten
5
+ from scrapling.core.utils import _is_iterable, flatten
6
+ from scrapling.core._types import Dict, List, Union, Pattern, SupportsIndex
7
7
 
8
8
  from orjson import loads, dumps
9
9
  from w3lib.html import replace_entities as _replace_entities
@@ -69,7 +69,7 @@ class TextHandler(str):
69
69
  return [TextHandler(_replace_entities(s)) for s in results]
70
70
 
71
71
  def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
72
- clean_match: bool = False, case_sensitive: bool = False,):
72
+ clean_match: bool = False, case_sensitive: bool = False) -> Union[str, None]:
73
73
  """Apply the given regex to text and return the first match if found, otherwise return the default value.
74
74
 
75
75
  :param regex: Can be either a compiled regular expression or a string.
@@ -83,6 +83,51 @@ class TextHandler(str):
83
83
  return result[0] if result else default
84
84
 
85
85
 
86
+ class TextHandlers(List[TextHandler]):
87
+ """
88
+ The :class:`TextHandlers` class is a subclass of the builtin ``List`` class, which provides a few additional methods.
89
+ """
90
+ __slots__ = ()
91
+
92
+ def __getitem__(self, pos: Union[SupportsIndex, slice]) -> Union[TextHandler, "TextHandlers[TextHandler]"]:
93
+ lst = super().__getitem__(pos)
94
+ if isinstance(pos, slice):
95
+ return self.__class__(lst)
96
+ else:
97
+ return lst
98
+
99
+ def re(self, regex: Union[str, Pattern[str]], replace_entities: bool = True, clean_match: bool = False,
100
+ case_sensitive: bool = False) -> 'List[str]':
101
+ """Call the ``.re()`` method for each element in this list and return
102
+ their results flattened as TextHandlers.
103
+
104
+ :param regex: Can be either a compiled regular expression or a string.
105
+ :param replace_entities: if enabled character entity references are replaced by their corresponding character
106
+ :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
107
+ :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
108
+ """
109
+ results = [
110
+ n.re(regex, replace_entities, clean_match, case_sensitive) for n in self
111
+ ]
112
+ return flatten(results)
113
+
114
+ def re_first(self, regex: Union[str, Pattern[str]], default=None, replace_entities: bool = True,
115
+ clean_match: bool = False, case_sensitive: bool = False) -> Union[str, None]:
116
+ """Call the ``.re_first()`` method for each element in this list and return
117
+ the first result or the default value otherwise.
118
+
119
+ :param regex: Can be either a compiled regular expression or a string.
120
+ :param default: The default value to be returned if there is no match
121
+ :param replace_entities: if enabled character entity references are replaced by their corresponding character
122
+ :param clean_match: if enabled, this will ignore all whitespaces and consecutive spaces while matching
123
+ :param case_sensitive: if enabled, function will set the regex to ignore letters case while compiling it
124
+ """
125
+ for n in self:
126
+ for result in n.re(regex, replace_entities, clean_match, case_sensitive):
127
+ return result
128
+ return default
129
+
130
+
86
131
  class AttributesHandler(Mapping):
87
132
  """A read-only mapping to use instead of the standard dictionary for the speed boost but
88
133
  at the same time I use it to add more functionalities.
@@ -4,7 +4,7 @@ class SelectorsGeneration:
4
4
  Trying to generate selectors like Firefox or maybe cleaner ones!? Ehm
5
5
  Inspiration: https://searchfox.org/mozilla-central/source/devtools/shared/inspector/css-logic.js#591"""
6
6
 
7
- def __general_selection(self, selection: str = 'css') -> str:
7
+ def __general_selection(self, selection: str = 'css', full_path=False) -> str:
8
8
  """Generate a selector for the current element.
9
9
  :return: A string of the generated selector.
10
10
  """
@@ -20,10 +20,11 @@ class SelectorsGeneration:
20
20
  else f"[@id='{target.attrib['id']}']"
21
21
  )
22
22
  selectorPath.append(part)
23
- return (
24
- " > ".join(reversed(selectorPath)) if css
25
- else '//*' + "/".join(reversed(selectorPath))
26
- )
23
+ if not full_path:
24
+ return (
25
+ " > ".join(reversed(selectorPath)) if css
26
+ else '//*' + "/".join(reversed(selectorPath))
27
+ )
27
28
  else:
28
29
  part = f'{target.tag}'
29
30
  # We won't use classes anymore because I some websites share exact classes between elements
@@ -60,15 +61,29 @@ class SelectorsGeneration:
60
61
  )
61
62
 
62
63
  @property
63
- def css_selector(self) -> str:
64
+ def generate_css_selector(self) -> str:
64
65
  """Generate a CSS selector for the current element
65
66
  :return: A string of the generated selector.
66
67
  """
67
68
  return self.__general_selection()
68
69
 
69
70
  @property
70
- def xpath_selector(self) -> str:
71
+ def generate_full_css_selector(self) -> str:
72
+ """Generate a complete CSS selector for the current element
73
+ :return: A string of the generated selector.
74
+ """
75
+ return self.__general_selection(full_path=True)
76
+
77
+ @property
78
+ def generate_xpath_selector(self) -> str:
71
79
  """Generate a XPath selector for the current element
72
80
  :return: A string of the generated selector.
73
81
  """
74
82
  return self.__general_selection('xpath')
83
+
84
+ @property
85
+ def generate_full_xpath_selector(self) -> str:
86
+ """Generate a complete XPath selector for the current element
87
+ :return: A string of the generated selector.
88
+ """
89
+ return self.__general_selection('xpath', full_path=True)
@@ -4,9 +4,9 @@ import logging
4
4
  import threading
5
5
  from hashlib import sha256
6
6
  from abc import ABC, abstractmethod
7
- from typing import Dict, Optional, Union
8
7
 
9
- from scrapling.utils import _StorageTools, cache
8
+ from scrapling.core._types import Dict, Optional, Union
9
+ from scrapling.core.utils import _StorageTools, cache
10
10
 
11
11
  from lxml import html
12
12
  from tldextract import extract as tld
@@ -9,24 +9,14 @@ which will be important in future releases but most importantly...
9
9
  import re
10
10
 
11
11
  from w3lib.html import HTML5_WHITESPACE
12
- from typing import TYPE_CHECKING, Any, Optional
13
- try:
14
- from typing import Protocol
15
- except ImportError:
16
- # Added in Python 3.8
17
- Protocol = object
18
-
19
- from scrapling.utils import cache
12
+ from scrapling.core.utils import cache
13
+ from scrapling.core._types import Any, Optional, Protocol, Self
20
14
 
21
15
  from cssselect.xpath import ExpressionError
22
16
  from cssselect.xpath import XPathExpr as OriginalXPathExpr
23
17
  from cssselect import HTMLTranslator as OriginalHTMLTranslator
24
18
  from cssselect.parser import Element, FunctionalPseudoElement, PseudoElement
25
19
 
26
- if TYPE_CHECKING:
27
- # typing.Self requires Python 3.11
28
- from typing_extensions import Self
29
-
30
20
 
31
21
  regex = f"[{HTML5_WHITESPACE}]+"
32
22
  replace_html5_whitespaces = re.compile(regex).sub
@@ -1,14 +1,14 @@
1
1
  import re
2
- import os
3
2
  import logging
4
3
  from itertools import chain
5
- from logging import handlers
6
4
  # Using cache on top of a class is brilliant way to achieve Singleton design pattern without much code
7
5
  from functools import lru_cache as cache # functools.cache is available on Python 3.9+ only so let's keep lru_cache
8
6
 
9
- from typing import Dict, Iterable, Any
7
+ from scrapling.core._types import Dict, Iterable, Any, Union
10
8
 
9
+ import orjson
11
10
  from lxml import html
11
+
12
12
  html_forbidden = {html.HtmlComment, }
13
13
  logging.basicConfig(
14
14
  level=logging.ERROR,
@@ -19,6 +19,17 @@ logging.basicConfig(
19
19
  )
20
20
 
21
21
 
22
+ def is_jsonable(content: Union[bytes, str]) -> bool:
23
+ if type(content) is bytes:
24
+ content = content.decode()
25
+
26
+ try:
27
+ _ = orjson.loads(content)
28
+ return True
29
+ except orjson.JSONDecodeError:
30
+ return False
31
+
32
+
22
33
  @cache(None, typed=True)
23
34
  def setup_basic_logging(level: str = 'debug'):
24
35
  levels = {
@@ -45,64 +56,6 @@ def _is_iterable(s: Any):
45
56
  return isinstance(s, (list, tuple,))
46
57
 
47
58
 
48
- @cache(None, typed=True)
49
- class _Logger(object):
50
- # I will leave this class here for now in case I decide I want to come back to use it :)
51
- __slots__ = ('console_logger', 'logger_file_path',)
52
- levels = {
53
- 'debug': logging.DEBUG,
54
- 'info': logging.INFO,
55
- 'warning': logging.WARNING,
56
- 'error': logging.ERROR,
57
- 'critical': logging.CRITICAL
58
- }
59
-
60
- def __init__(self, filename: str = 'debug.log', level: str = 'debug', when: str = 'midnight', backcount: int = 1):
61
- os.makedirs(os.path.join(os.path.dirname(__file__), 'logs'), exist_ok=True)
62
- format_str = logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S")
63
-
64
- # on-screen output
65
- lvl = self.levels[level.lower()]
66
- self.console_logger = logging.getLogger('Scrapling')
67
- self.console_logger.setLevel(lvl)
68
- console_handler = logging.StreamHandler()
69
- console_handler.setLevel(lvl)
70
- console_handler.setFormatter(format_str)
71
- self.console_logger.addHandler(console_handler)
72
-
73
- if lvl == logging.DEBUG:
74
- filename = os.path.join(os.path.dirname(__file__), 'logs', filename)
75
- self.logger_file_path = filename
76
- # Automatically generates the logging file at specified intervals
77
- file_handler = handlers.TimedRotatingFileHandler(
78
- # If more than (backcount+1) existed, oldest logs will be deleted
79
- filename=filename, when=when, backupCount=backcount, encoding='utf-8'
80
- )
81
- file_handler.setLevel(lvl)
82
- file_handler.setFormatter(format_str)
83
- # This for the logger when it appends the date to the new log
84
- file_handler.namer = lambda name: name.replace(".log", "") + ".log"
85
- self.console_logger.addHandler(file_handler)
86
- self.debug(f'Debug log path: {self.logger_file_path}')
87
- else:
88
- self.logger_file_path = None
89
-
90
- def debug(self, message: str) -> None:
91
- self.console_logger.debug(message)
92
-
93
- def info(self, message: str) -> None:
94
- self.console_logger.info(message)
95
-
96
- def warning(self, message: str) -> None:
97
- self.console_logger.warning(message)
98
-
99
- def error(self, message: str) -> None:
100
- self.console_logger.error(message)
101
-
102
- def critical(self, message: str) -> None:
103
- self.console_logger.critical(message)
104
-
105
-
106
59
  class _StorageTools:
107
60
  @staticmethod
108
61
  def __clean_attributes(element: html.HtmlElement, forbidden: tuple = ()) -> Dict:
@@ -0,0 +1,7 @@
1
+ from .camo import CamoufoxEngine
2
+ from .static import StaticEngine
3
+ from .pw import PlaywrightEngine
4
+ from .constants import DEFAULT_DISABLED_RESOURCES, DEFAULT_STEALTH_FLAGS
5
+ from .toolbelt import check_if_engine_usable
6
+
7
+ __all__ = ['CamoufoxEngine', 'PlaywrightEngine']
@@ -0,0 +1,128 @@
1
+ import logging
2
+ from scrapling.core._types import Union, Callable, Optional, Dict, List, Literal
3
+
4
+ from scrapling.engines.toolbelt import (
5
+ Response,
6
+ do_nothing,
7
+ get_os_name,
8
+ intercept_route,
9
+ check_type_validity,
10
+ construct_proxy_dict,
11
+ generate_convincing_referer,
12
+ )
13
+
14
+ from camoufox.sync_api import Camoufox
15
+
16
+
17
+ class CamoufoxEngine:
18
+ def __init__(
19
+ self, headless: Optional[Union[bool, Literal['virtual']]] = True, block_images: Optional[bool] = False, disable_resources: Optional[bool] = False,
20
+ block_webrtc: Optional[bool] = False, allow_webgl: Optional[bool] = False, network_idle: Optional[bool] = False, humanize: Optional[Union[bool, float]] = True,
21
+ timeout: Optional[float] = 30000, page_action: Callable = do_nothing, wait_selector: Optional[str] = None, addons: Optional[List[str]] = None,
22
+ wait_selector_state: str = 'attached', google_search: Optional[bool] = True, extra_headers: Optional[Dict[str, str]] = None,
23
+ proxy: Optional[Union[str, Dict[str, str]]] = None, os_randomize: Optional[bool] = None, adaptor_arguments: Dict = None
24
+ ):
25
+ """An engine that utilizes Camoufox library, check the `StealthyFetcher` class for more documentation.
26
+
27
+ :param headless: Run the browser in headless/hidden (default), virtual screen mode, or headful/visible mode.
28
+ :param block_images: Prevent the loading of images through Firefox preferences.
29
+ This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
30
+ :param disable_resources: Drop requests of unnecessary resources for a speed boost. It depends but it made requests ~25% faster in my tests for some websites.
31
+ Requests dropped are of type `font`, `image`, `media`, `beacon`, `object`, `imageset`, `texttrack`, `websocket`, `csp_report`, and `stylesheet`.
32
+ This can help save your proxy usage but be careful with this option as it makes some websites never finish loading.
33
+ :param block_webrtc: Blocks WebRTC entirely.
34
+ :param addons: List of Firefox addons to use. Must be paths to extracted addons.
35
+ :param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
36
+ :param allow_webgl: Whether to allow WebGL. To prevent leaks, only use this for special cases.
37
+ :param network_idle: Wait for the page until there are no network connections for at least 500 ms.
38
+ :param os_randomize: If enabled, Scrapling will randomize the OS fingerprints used. The default is Scrapling matching the fingerprints with the current OS.
39
+ :param timeout: The timeout in milliseconds that is used in all operations and waits through the page. The default is 30000
40
+ :param page_action: Added for automation. A function that takes the `page` object, does the automation you need, then returns `page` again.
41
+ :param wait_selector: Wait for a specific css selector to be in a specific state.
42
+ :param wait_selector_state: The state to wait for the selector given with `wait_selector`. Default state is `attached`.
43
+ :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search for this website's domain name.
44
+ :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
45
+ :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
46
+ :param adaptor_arguments: The arguments that will be passed in the end while creating the final Adaptor's class.
47
+ """
48
+ self.headless = headless
49
+ self.block_images = bool(block_images)
50
+ self.disable_resources = bool(disable_resources)
51
+ self.block_webrtc = bool(block_webrtc)
52
+ self.allow_webgl = bool(allow_webgl)
53
+ self.network_idle = bool(network_idle)
54
+ self.google_search = bool(google_search)
55
+ self.os_randomize = bool(os_randomize)
56
+ self.extra_headers = extra_headers or {}
57
+ self.proxy = construct_proxy_dict(proxy)
58
+ self.addons = addons or []
59
+ self.humanize = humanize
60
+ self.timeout = check_type_validity(timeout, [int, float], 30000)
61
+ if callable(page_action):
62
+ self.page_action = page_action
63
+ else:
64
+ self.page_action = do_nothing
65
+ logging.error('[Ignored] Argument "page_action" must be callable')
66
+
67
+ self.wait_selector = wait_selector
68
+ self.wait_selector_state = wait_selector_state
69
+ self.adaptor_arguments = adaptor_arguments if adaptor_arguments else {}
70
+
71
+ def fetch(self, url: str) -> Response:
72
+ """Opens up the browser and do your request based on your chosen options.
73
+
74
+ :param url: Target url.
75
+ :return: A `Response` object that is the same as `Adaptor` object except it has these added attributes: `status`, `reason`, `cookies`, `headers`, and `request_headers`
76
+ """
77
+ with Camoufox(
78
+ proxy=self.proxy,
79
+ addons=self.addons,
80
+ headless=self.headless,
81
+ humanize=self.humanize,
82
+ i_know_what_im_doing=True, # To turn warnings off with the user configurations
83
+ allow_webgl=self.allow_webgl,
84
+ block_webrtc=self.block_webrtc,
85
+ block_images=self.block_images, # Careful! it makes some websites doesn't finish loading at all like stackoverflow even in headful
86
+ os=None if self.os_randomize else get_os_name(),
87
+ ) as browser:
88
+ page = browser.new_page()
89
+ page.set_default_navigation_timeout(self.timeout)
90
+ page.set_default_timeout(self.timeout)
91
+ if self.disable_resources:
92
+ page.route("**/*", intercept_route)
93
+
94
+ if self.extra_headers:
95
+ page.set_extra_http_headers(self.extra_headers)
96
+
97
+ res = page.goto(url, referer=generate_convincing_referer(url) if self.google_search else None)
98
+ page.wait_for_load_state(state="domcontentloaded")
99
+ if self.network_idle:
100
+ page.wait_for_load_state('networkidle')
101
+
102
+ page = self.page_action(page)
103
+
104
+ if self.wait_selector and type(self.wait_selector) is str:
105
+ waiter = page.locator(self.wait_selector)
106
+ waiter.wait_for(state=self.wait_selector_state)
107
+
108
+ content_type = res.headers.get('content-type', '')
109
+ # Parse charset from content-type
110
+ encoding = 'utf-8' # default encoding
111
+ if 'charset=' in content_type.lower():
112
+ encoding = content_type.lower().split('charset=')[-1].split(';')[0].strip()
113
+
114
+ response = Response(
115
+ url=res.url,
116
+ text=page.content(),
117
+ content=res.body(),
118
+ status=res.status,
119
+ reason=res.status_text,
120
+ encoding=encoding,
121
+ cookies={cookie['name']: cookie['value'] for cookie in page.context.cookies()},
122
+ headers=res.all_headers(),
123
+ request_headers=res.request.all_headers(),
124
+ adaptor_arguments=self.adaptor_arguments
125
+ )
126
+ page.close()
127
+
128
+ return response
@@ -0,0 +1,108 @@
1
+ # Disable loading these resources for speed
2
+ DEFAULT_DISABLED_RESOURCES = [
3
+ 'font',
4
+ 'image',
5
+ 'media',
6
+ 'beacon',
7
+ 'object',
8
+ 'imageset',
9
+ 'texttrack',
10
+ 'websocket',
11
+ 'csp_report',
12
+ 'stylesheet',
13
+ ]
14
+
15
+ DEFAULT_STEALTH_FLAGS = [
16
+ # Explanation: https://peter.sh/experiments/chromium-command-line-switches/
17
+ # Generally this will make the browser faster and less detectable
18
+ '--no-pings',
19
+ '--incognito',
20
+ '--test-type',
21
+ '--lang=en-US',
22
+ '--mute-audio',
23
+ '--no-first-run',
24
+ '--disable-sync',
25
+ '--hide-scrollbars',
26
+ '--disable-logging',
27
+ '--start-maximized', # For headless check bypass
28
+ '--enable-async-dns',
29
+ '--disable-breakpad',
30
+ '--disable-infobars',
31
+ '--accept-lang=en-US',
32
+ '--use-mock-keychain',
33
+ '--disable-translate',
34
+ '--disable-extensions',
35
+ '--disable-voice-input',
36
+ '--window-position=0,0',
37
+ '--disable-wake-on-wifi',
38
+ '--ignore-gpu-blocklist',
39
+ '--enable-tcp-fast-open',
40
+ '--enable-web-bluetooth',
41
+ '--disable-hang-monitor',
42
+ '--password-store=basic',
43
+ '--disable-cloud-import',
44
+ '--disable-default-apps',
45
+ '--disable-print-preview',
46
+ '--disable-dev-shm-usage',
47
+ '--disable-popup-blocking',
48
+ '--metrics-recording-only',
49
+ '--disable-crash-reporter',
50
+ '--disable-partial-raster',
51
+ '--disable-gesture-typing',
52
+ '--disable-checker-imaging',
53
+ '--disable-prompt-on-repost',
54
+ '--force-color-profile=srgb',
55
+ '--font-render-hinting=none',
56
+ '--no-default-browser-check',
57
+ '--aggressive-cache-discard',
58
+ '--disable-component-update',
59
+ '--disable-cookie-encryption',
60
+ '--disable-domain-reliability',
61
+ '--disable-threaded-animation',
62
+ '--disable-threaded-scrolling',
63
+ # '--disable-reading-from-canvas', # For Firefox
64
+ '--enable-simple-cache-backend',
65
+ '--disable-background-networking',
66
+ '--disable-session-crashed-bubble',
67
+ '--enable-surface-synchronization',
68
+ '--disable-image-animation-resync',
69
+ '--disable-renderer-backgrounding',
70
+ '--disable-ipc-flooding-protection',
71
+ '--prerender-from-omnibox=disabled',
72
+ '--safebrowsing-disable-auto-update',
73
+ '--disable-offer-upload-credit-cards',
74
+ '--disable-features=site-per-process',
75
+ '--disable-background-timer-throttling',
76
+ '--disable-new-content-rendering-timeout',
77
+ '--run-all-compositor-stages-before-draw',
78
+ '--disable-client-side-phishing-detection',
79
+ '--disable-backgrounding-occluded-windows',
80
+ '--disable-layer-tree-host-memory-pressure',
81
+ '--autoplay-policy=no-user-gesture-required',
82
+ '--disable-offer-store-unmasked-wallet-cards',
83
+ '--disable-blink-features=AutomationControlled',
84
+ '--webrtc-ip-handling-policy=disable_non_proxied_udp',
85
+ '--disable-component-extensions-with-background-pages',
86
+ '--force-webrtc-ip-handling-policy=disable_non_proxied_udp',
87
+ '--enable-features=NetworkService,NetworkServiceInProcess,TrustTokens,TrustTokensAlwaysAllowIssuance',
88
+ '--blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4',
89
+ '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process,TranslateUI,BlinkGenPropertyTrees',
90
+ ]
91
+
92
+ # Defaulting to the docker mode, token doesn't matter in it as it's passed for the container
93
+ NSTBROWSER_DEFAULT_QUERY = {
94
+ "once": True,
95
+ "headless": True,
96
+ "autoClose": True,
97
+ "fingerprint": {
98
+ "flags": {
99
+ "timezone": "BasedOnIp",
100
+ "screen": "Custom"
101
+ },
102
+ "platform": 'linux', # support: windows, mac, linux
103
+ "kernel": 'chromium', # only support: chromium
104
+ "kernelMilestone": '128',
105
+ "hardwareConcurrency": 8,
106
+ "deviceMemory": 8,
107
+ },
108
+ }