NullGazeX 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nullgaze/__init__.py ADDED
@@ -0,0 +1,168 @@
1
+ """
2
+ NullGazeX — High-speed content retrieval with DPI bypass and TLS impersonation.
3
+
4
+ Provides two engines:
5
+ - ImageDownloader — download images through any firewall/protection
6
+ - PageScraper — scrape any page content undetected
7
+ """
8
+
9
+ from .downloader import ImageDownloader
10
+ from .scraper import PageScraper
11
+ from .exceptions import (
12
+ NullGazeError,
13
+ DownloadFailedError,
14
+ InvalidURLError,
15
+ ScrapeError,
16
+ BlockedError,
17
+ )
18
+
19
+ __version__ = "2.1.0"
20
+
21
+ # ------------------------------------------------------------------
22
+ # Image downloading (convenience)
23
+ # ------------------------------------------------------------------
24
+ def download_image(
25
+ url: str,
26
+ output_path: str,
27
+ verbose: bool = False,
28
+ headers: dict = None,
29
+ race_timeout: float = 4.0,
30
+ ) -> str:
31
+ """
32
+ Convenience wrapper to download a single image.
33
+
34
+ All bypass strategies are raced in parallel — the fastest wins.
35
+ """
36
+ downloader = ImageDownloader(verbose=verbose)
37
+ return downloader.download(
38
+ url, output_path, headers=headers, race_timeout=race_timeout
39
+ )
40
+
41
+
42
+ def download_images(
43
+ targets: list,
44
+ max_workers: int = 20,
45
+ verbose: bool = False,
46
+ headers: dict = None,
47
+ race_timeout: float = 4.0,
48
+ adaptive_delay: bool = True,
49
+ ) -> list:
50
+ """
51
+ Download multiple images concurrently with adaptive anti-blocking delays.
52
+ """
53
+ downloader = ImageDownloader(verbose=verbose)
54
+ return downloader.download_bulk(
55
+ targets,
56
+ max_workers=max_workers,
57
+ headers=headers,
58
+ race_timeout=race_timeout,
59
+ adaptive_delay=adaptive_delay,
60
+ )
61
+
62
+
63
+ # ------------------------------------------------------------------
64
+ # Page scraping (convenience)
65
+ # ------------------------------------------------------------------
66
+ def scrape_page(
67
+ url: str,
68
+ headers: dict = None,
69
+ verbose: bool = False,
70
+ race_timeout: float = 5.0,
71
+ ) -> str:
72
+ """
73
+ Scrape a page and return raw HTML.
74
+
75
+ Uses the same DPI-bypass proxy + rotating TLS fingerprints
76
+ that power the image downloader. Undetectable by robot tests.
77
+ """
78
+ scraper = PageScraper(verbose=verbose)
79
+ return scraper.scrape(url, headers=headers, race_timeout=race_timeout)
80
+
81
+
82
+ def scrape_text(
83
+ url: str,
84
+ headers: dict = None,
85
+ verbose: bool = False,
86
+ race_timeout: float = 5.0,
87
+ ) -> str:
88
+ """Scrape a page and return clean plain-text content."""
89
+ scraper = PageScraper(verbose=verbose)
90
+ return scraper.scrape_text(url, headers=headers, race_timeout=race_timeout)
91
+
92
+
93
+ def scrape_title(
94
+ url: str,
95
+ headers: dict = None,
96
+ verbose: bool = False,
97
+ race_timeout: float = 5.0,
98
+ ) -> str:
99
+ """Scrape a page and return its <title> text."""
100
+ scraper = PageScraper(verbose=verbose)
101
+ return scraper.scrape_title(url, headers=headers, race_timeout=race_timeout)
102
+
103
+
104
+ def scrape_json(
105
+ url: str,
106
+ headers: dict = None,
107
+ verbose: bool = False,
108
+ race_timeout: float = 5.0,
109
+ ):
110
+ """Scrape a JSON endpoint and return the parsed Python object."""
111
+ scraper = PageScraper(verbose=verbose)
112
+ return scraper.scrape_json(url, headers=headers, race_timeout=race_timeout)
113
+
114
+
115
+ def scrape_bulk(
116
+ urls: list,
117
+ max_workers: int = 15,
118
+ headers: dict = None,
119
+ verbose: bool = False,
120
+ race_timeout: float = 5.0,
121
+ adaptive_delay: bool = True,
122
+ ) -> list:
123
+ """Scrape multiple URLs in parallel."""
124
+ scraper = PageScraper(verbose=verbose)
125
+ return scraper.scrape_bulk(
126
+ urls,
127
+ max_workers=max_workers,
128
+ headers=headers,
129
+ race_timeout=race_timeout,
130
+ adaptive_delay=adaptive_delay,
131
+ )
132
+
133
+
134
+ # ------------------------------------------------------------------
135
+ # Engine pre-warming
136
+ # ------------------------------------------------------------------
137
+ def engine_prewarm():
138
+ """
139
+ Pre-start the shared DPI-bypass proxy so the first real request
140
+ hits a hot path (sub-100ms).
141
+ """
142
+ BaseEngine = ImageDownloader.__bases__[0]
143
+ BaseEngine.prewarm()
144
+
145
+
146
+ # Public API surface
147
+ __all__ = [
148
+ # Classes
149
+ "ImageDownloader",
150
+ "PageScraper",
151
+ # Image functions
152
+ "download_image",
153
+ "download_images",
154
+ # Scraping functions
155
+ "scrape_page",
156
+ "scrape_text",
157
+ "scrape_title",
158
+ "scrape_json",
159
+ "scrape_bulk",
160
+ # Utility
161
+ "engine_prewarm",
162
+ # Exceptions
163
+ "NullGazeError",
164
+ "DownloadFailedError",
165
+ "InvalidURLError",
166
+ "ScrapeError",
167
+ "BlockedError",
168
+ ]