horaa-tls 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Amit Haina
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,110 @@
1
+ Metadata-Version: 2.4
2
+ Name: horaa-tls
3
+ Version: 0.1.0
4
+ Summary: Advanced HTTP client with low-level TLS and browser fingerprint spoofing based on Go tls-client FFI
5
+ Author: Amit Haina
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Dynamic: license-file
13
+
14
+ # 🌌 Horaa TLS
15
+ *State-of-the-art in-process browser fingerprint emulation and HTTP client for Python.*
16
+
17
+ `horaa-tls` is a high-performance HTTP client designed to evade anti-bot security layers (such as Cloudflare Turnstile, Akamai, Imperva, and DataDome). By interfacing directly with a precompiled Go-based BoringSSL networking backend via a ctypes FFI wrapper, it maintains a footprint indistinguishable from a real web browser at both the TLS socket and HTTP/2 layer.
18
+
19
+ ---
20
+
21
+ ## 💡 Why Horaa TLS?
22
+
23
+ * **Zero External Dependencies**: Automatically detects your OS and architecture, downloads the matching precompiled Go libraries, and initializes everything dynamically without requiring third-party pip dependencies.
24
+ * **Cryptographic Emulation**: Leverages preset browser profiles (Chrome 133, Firefox 133, etc.) to negotiate matching TLS extensions, cipher suites, key share curves, and HTTP/2 settings.
25
+ * **Aligned User-Agents & Client Hints**: Keeps HTTP/2 Client Hints (`Sec-Ch-Ua`, `Sec-Ch-Ua-Mobile`, `Sec-Ch-Ua-Platform`) perfectly aligned with the selected browser TLS version to prevent anti-bot detection signals.
26
+ * **Decoupled Middleware Hooks**: Register asynchronous or synchronous middleware layers (such as rotators and retries) directly in the request-response cycle.
27
+
28
+ ---
29
+
30
+ ## 🚀 Quick Start (The One-Minute Tour)
31
+
32
+ Initialize a session mimicking a Chrome 133 browser:
33
+
34
+ ```python
35
+ from horaa_tls import Session, ClientProfile
36
+
37
+ # Create a stateful, browser-emulating session
38
+ session = Session(profile=ClientProfile.CHROME_133)
39
+
40
+ try:
41
+ # Perform a request (headers, JA3/JA4, and Client Hints are automatically injected)
42
+ response = session.get("https://httpbingo.org/get")
43
+ print(f"Status Code: {response.status_code}")
44
+ print(response.json())
45
+ finally:
46
+ # Always close the session to release low-level FFI memory allocations
47
+ session.close()
48
+ ```
49
+
50
+ ---
51
+
52
+ ## 🛠️ Core Concepts & Advanced Guide
53
+
54
+ ### 🧬 Aligned Browser Profiles
55
+ `horaa-tls` currently offers pre-configured emulation profiles:
56
+
57
+ * **Chrome Series**: `chrome_103`, `chrome_110`, `chrome_120`, `chrome_133`
58
+ * **Firefox Series**: `firefox_117`, `firefox_123`, `firefox_133`
59
+ * **Safari Series**: `safari_16_0`, `safari_ios_17_0`
60
+ * **Opera Series**: `opera_90`
61
+
62
+ ### 🏗️ Stateful Middleware Pipeline
63
+ You can easily intercept, modify, or retry requests using the built-in middleware engine. Registering rotators or exponential backoffs is straightforward:
64
+
65
+ ```python
66
+ from horaa_tls import Session, ClientProfile
67
+ from horaa_tls.middleware.proxy import ProxyRotatorMiddleware
68
+ from horaa_tls.middleware.retry import RetryMiddleware
69
+
70
+ session = Session(profile=ClientProfile.CHROME_133)
71
+
72
+ # 1. State-aware proxy rotator with failover recovery
73
+ proxies = ["http://proxy1.example.com:8080", "http://proxy2.example.com:8080"]
74
+ session.middleware_pipeline.register(
75
+ ProxyRotatorMiddleware(proxies=proxies, mode="failover", max_failovers=3)
76
+ )
77
+
78
+ # 2. Exponential backoff retry handler for transport/network drops
79
+ session.middleware_pipeline.register(
80
+ RetryMiddleware(max_retries=3, backoff_factor=2.0, status_forcelist=[500, 502, 503, 504])
81
+ )
82
+
83
+ # Request executes automatically through the middleware pipeline
84
+ res = session.get("https://httpbingo.org/get")
85
+ session.close()
86
+ ```
87
+
88
+ ### 📦 Session Snapshots (Persistence)
89
+ Export and restore session states (cookies, custom headers, active proxies, and middleware indicators) to distribute scraper instances across servers or queues:
90
+
91
+ ```python
92
+ # Save current state
93
+ session_state_json = session.to_json()
94
+
95
+ # Recreate an identical session in a different worker/process
96
+ restored_session = Session.from_json(session_state_json)
97
+ ```
98
+
99
+ ### 🍪 Direct FFI Cookie Management
100
+ Interact directly with the Go-layer cookie jar for fine-grained token/session management:
101
+
102
+ ```python
103
+ # Read active cookies stored in the Go memory layer
104
+ cookies = session.get_cookies_from_backend("https://example.com")
105
+
106
+ # Inject cookies directly into the Go-layer FFI engine
107
+ session.add_cookies_to_backend("https://example.com", [
108
+ {"name": "session_token", "value": "token_value", "domain": ".example.com", "path": "/"}
109
+ ])
110
+ ```
@@ -0,0 +1,97 @@
1
+ # 🌌 Horaa TLS
2
+ *State-of-the-art in-process browser fingerprint emulation and HTTP client for Python.*
3
+
4
+ `horaa-tls` is a high-performance HTTP client designed to evade anti-bot security layers (such as Cloudflare Turnstile, Akamai, Imperva, and DataDome). By interfacing directly with a precompiled Go-based BoringSSL networking backend via a ctypes FFI wrapper, it maintains a footprint indistinguishable from a real web browser at both the TLS socket and HTTP/2 layer.
5
+
6
+ ---
7
+
8
+ ## 💡 Why Horaa TLS?
9
+
10
+ * **Zero External Dependencies**: Automatically detects your OS and architecture, downloads the matching precompiled Go libraries, and initializes everything dynamically without requiring third-party pip dependencies.
11
+ * **Cryptographic Emulation**: Leverages preset browser profiles (Chrome 133, Firefox 133, etc.) to negotiate matching TLS extensions, cipher suites, key share curves, and HTTP/2 settings.
12
+ * **Aligned User-Agents & Client Hints**: Keeps HTTP/2 Client Hints (`Sec-Ch-Ua`, `Sec-Ch-Ua-Mobile`, `Sec-Ch-Ua-Platform`) perfectly aligned with the selected browser TLS version to prevent anti-bot detection signals.
13
+ * **Decoupled Middleware Hooks**: Register asynchronous or synchronous middleware layers (such as rotators and retries) directly in the request-response cycle.
14
+
15
+ ---
16
+
17
+ ## 🚀 Quick Start (The One-Minute Tour)
18
+
19
+ Initialize a session mimicking a Chrome 133 browser:
20
+
21
+ ```python
22
+ from horaa_tls import Session, ClientProfile
23
+
24
+ # Create a stateful, browser-emulating session
25
+ session = Session(profile=ClientProfile.CHROME_133)
26
+
27
+ try:
28
+ # Perform a request (headers, JA3/JA4, and Client Hints are automatically injected)
29
+ response = session.get("https://httpbingo.org/get")
30
+ print(f"Status Code: {response.status_code}")
31
+ print(response.json())
32
+ finally:
33
+ # Always close the session to release low-level FFI memory allocations
34
+ session.close()
35
+ ```
36
+
37
+ ---
38
+
39
+ ## 🛠️ Core Concepts & Advanced Guide
40
+
41
+ ### 🧬 Aligned Browser Profiles
42
+ `horaa-tls` currently offers pre-configured emulation profiles:
43
+
44
+ * **Chrome Series**: `chrome_103`, `chrome_110`, `chrome_120`, `chrome_133`
45
+ * **Firefox Series**: `firefox_117`, `firefox_123`, `firefox_133`
46
+ * **Safari Series**: `safari_16_0`, `safari_ios_17_0`
47
+ * **Opera Series**: `opera_90`
48
+
49
+ ### 🏗️ Stateful Middleware Pipeline
50
+ You can easily intercept, modify, or retry requests using the built-in middleware engine. Registering rotators or exponential backoffs is straightforward:
51
+
52
+ ```python
53
+ from horaa_tls import Session, ClientProfile
54
+ from horaa_tls.middleware.proxy import ProxyRotatorMiddleware
55
+ from horaa_tls.middleware.retry import RetryMiddleware
56
+
57
+ session = Session(profile=ClientProfile.CHROME_133)
58
+
59
+ # 1. State-aware proxy rotator with failover recovery
60
+ proxies = ["http://proxy1.example.com:8080", "http://proxy2.example.com:8080"]
61
+ session.middleware_pipeline.register(
62
+ ProxyRotatorMiddleware(proxies=proxies, mode="failover", max_failovers=3)
63
+ )
64
+
65
+ # 2. Exponential backoff retry handler for transport/network drops
66
+ session.middleware_pipeline.register(
67
+ RetryMiddleware(max_retries=3, backoff_factor=2.0, status_forcelist=[500, 502, 503, 504])
68
+ )
69
+
70
+ # Request executes automatically through the middleware pipeline
71
+ res = session.get("https://httpbingo.org/get")
72
+ session.close()
73
+ ```
74
+
75
+ ### 📦 Session Snapshots (Persistence)
76
+ Export and restore session states (cookies, custom headers, active proxies, and middleware indicators) to distribute scraper instances across servers or queues:
77
+
78
+ ```python
79
+ # Save current state
80
+ session_state_json = session.to_json()
81
+
82
+ # Recreate an identical session in a different worker/process
83
+ restored_session = Session.from_json(session_state_json)
84
+ ```
85
+
86
+ ### 🍪 Direct FFI Cookie Management
87
+ Interact directly with the Go-layer cookie jar for fine-grained token/session management:
88
+
89
+ ```python
90
+ # Read active cookies stored in the Go memory layer
91
+ cookies = session.get_cookies_from_backend("https://example.com")
92
+
93
+ # Inject cookies directly into the Go-layer FFI engine
94
+ session.add_cookies_to_backend("https://example.com", [
95
+ {"name": "session_token", "value": "token_value", "domain": ".example.com", "path": "/"}
96
+ ])
97
+ ```
@@ -0,0 +1,13 @@
1
+ from horaa_tls.client import Session, ClientProfile
2
+ from horaa_tls.response import Response, CaseInsensitiveDict
3
+ from horaa_tls.exceptions import HoraaTLSError, BackendError, NetworkError
4
+
5
+ __all__ = [
6
+ "Session",
7
+ "ClientProfile",
8
+ "Response",
9
+ "CaseInsensitiveDict",
10
+ "HoraaTLSError",
11
+ "BackendError",
12
+ "NetworkError",
13
+ ]
@@ -0,0 +1,32 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any, Dict
3
+
4
+
5
+ class BaseBackend(ABC):
6
+ """Abstract Base Class for pluggable request engines."""
7
+
8
+ @abstractmethod
9
+ def execute(self, request_payload: Dict[str, Any]) -> Dict[str, Any]:
10
+ """
11
+ Executes an HTTP request synchronously.
12
+
13
+ Args:
14
+ request_payload: A dictionary of parameters matching the backend requirements.
15
+
16
+ Returns:
17
+ A dictionary containing response details.
18
+ """
19
+ pass
20
+
21
+ @abstractmethod
22
+ async def execute_async(self, request_payload: Dict[str, Any]) -> Dict[str, Any]:
23
+ """
24
+ Executes an HTTP request asynchronously.
25
+
26
+ Args:
27
+ request_payload: A dictionary of parameters matching the backend requirements.
28
+
29
+ Returns:
30
+ A dictionary containing response details.
31
+ """
32
+ pass
@@ -0,0 +1,158 @@
1
+ import ctypes
2
+ import json
3
+ import asyncio
4
+ from typing import Any, Dict
5
+
6
+ from horaa_tls.backend.base import BaseBackend
7
+ from horaa_tls.exceptions import BackendError
8
+ from horaa_tls.utils.updater import update_if_necessary
9
+
10
+
11
+ class CtypesGoBackend(BaseBackend):
12
+ """
13
+ Backend implementation that loads the compiled Go tls-client library
14
+ via ctypes and invokes it in-process.
15
+ """
16
+
17
+ _lib = None
18
+
19
+ @classmethod
20
+ def get_library(cls):
21
+ """Loads and returns the ctypes Go dynamic library, initializing it on first use."""
22
+ if cls._lib is None:
23
+ try:
24
+ # Retrieve (and download if needed) the precompiled binary
25
+ lib_path = update_if_necessary()
26
+ lib = ctypes.cdll.LoadLibrary(lib_path)
27
+
28
+ # Define argtypes and restypes for Go-exported C functions
29
+ lib.request.argtypes = [ctypes.c_char_p]
30
+ lib.request.restype = ctypes.c_char_p
31
+
32
+ lib.freeMemory.argtypes = [ctypes.c_char_p]
33
+ lib.freeMemory.restype = ctypes.c_char_p
34
+
35
+ lib.getCookiesFromSession.argtypes = [ctypes.c_char_p]
36
+ lib.getCookiesFromSession.restype = ctypes.c_char_p
37
+
38
+ lib.addCookiesToSession.argtypes = [ctypes.c_char_p]
39
+ lib.addCookiesToSession.restype = ctypes.c_char_p
40
+
41
+ lib.destroySession.argtypes = [ctypes.c_char_p]
42
+ lib.destroySession.restype = ctypes.c_char_p
43
+
44
+ lib.destroyAll.argtypes = []
45
+ lib.destroyAll.restype = ctypes.c_char_p
46
+
47
+ cls._lib = lib
48
+ except Exception as e:
49
+ raise BackendError(f"Failed to load and initialize Go shared library: {e}")
50
+ return cls._lib
51
+
52
+ def _execute_sync(self, request_payload: Dict[str, Any]) -> Dict[str, Any]:
53
+ """Wrapper around the ctypes C call to request and free memory in Go."""
54
+ lib = self.get_library()
55
+ # Clean request payload by removing private keys starting with '_' (used for Python middleware state)
56
+ clean_payload = {k: v for k, v in request_payload.items() if not k.startswith("_")}
57
+ payload_bytes = json.dumps(clean_payload).encode("utf-8")
58
+
59
+ # Call Go library to execute request
60
+ response_ptr = lib.request(payload_bytes)
61
+ if not response_ptr:
62
+ raise BackendError("Null pointer returned from Go request execution.")
63
+
64
+ try:
65
+ # Read from C string pointer
66
+ response_bytes = ctypes.string_at(response_ptr)
67
+ response_data = json.loads(response_bytes.decode("utf-8"))
68
+ except Exception as e:
69
+ raise BackendError(f"Failed to parse Go response: {e}")
70
+ finally:
71
+ # Always call freeMemory to release the C-string allocated by Go FFI
72
+ if "response_data" in locals() and isinstance(response_data, dict) and "id" in response_data:
73
+ response_id = response_data["id"].encode("utf-8")
74
+ lib.freeMemory(response_id)
75
+
76
+ return response_data
77
+
78
+ def execute(self, request_payload: Dict[str, Any]) -> Dict[str, Any]:
79
+ """Execute request synchronously."""
80
+ return self._execute_sync(request_payload)
81
+
82
+ async def execute_async(self, request_payload: Dict[str, Any]) -> Dict[str, Any]:
83
+ """Execute request asynchronously by running the blocking ctypes call in an executor."""
84
+ loop = asyncio.get_running_loop()
85
+ # run_in_executor runs the synchronous FFI block in a background thread to prevent GIL stalling
86
+ return await loop.run_in_executor(None, self._execute_sync, request_payload)
87
+
88
+ def get_cookies(self, session_id: str, url: str) -> list:
89
+ """Fetch cookies stored in the Go session memory for a given URL."""
90
+ lib = self.get_library()
91
+ payload = json.dumps({"sessionId": session_id, "url": url}).encode("utf-8")
92
+ response_ptr = lib.getCookiesFromSession(payload)
93
+
94
+ if not response_ptr:
95
+ return []
96
+
97
+ try:
98
+ response_bytes = ctypes.string_at(response_ptr)
99
+ res_obj = json.loads(response_bytes.decode("utf-8"))
100
+ cookies = res_obj.get("cookies", [])
101
+ return cookies
102
+ finally:
103
+ if "res_obj" in locals() and isinstance(res_obj, dict) and "id" in res_obj:
104
+ lib.freeMemory(res_obj["id"].encode("utf-8"))
105
+
106
+ def add_cookies(self, session_id: str, url: str, cookies: list) -> list:
107
+ """Add cookies into Go session memory for a given URL."""
108
+ lib = self.get_library()
109
+ payload = json.dumps({
110
+ "sessionId": session_id,
111
+ "url": url,
112
+ "cookies": cookies
113
+ }).encode("utf-8")
114
+ response_ptr = lib.addCookiesToSession(payload)
115
+
116
+ if not response_ptr:
117
+ return []
118
+
119
+ try:
120
+ response_bytes = ctypes.string_at(response_ptr)
121
+ res_obj = json.loads(response_bytes.decode("utf-8"))
122
+ return res_obj.get("cookies", [])
123
+ finally:
124
+ if "res_obj" in locals() and isinstance(res_obj, dict) and "id" in res_obj:
125
+ lib.freeMemory(res_obj["id"].encode("utf-8"))
126
+
127
+ def destroy_session(self, session_id: str) -> bool:
128
+ """Destroys the session inside Go memory, releasing connections."""
129
+ lib = self.get_library()
130
+ payload = json.dumps({"sessionId": session_id}).encode("utf-8")
131
+ response_ptr = lib.destroySession(payload)
132
+
133
+ if not response_ptr:
134
+ return False
135
+
136
+ try:
137
+ response_bytes = ctypes.string_at(response_ptr)
138
+ res_obj = json.loads(response_bytes.decode("utf-8"))
139
+ return res_obj.get("success", False)
140
+ finally:
141
+ if "res_obj" in locals() and isinstance(res_obj, dict) and "id" in res_obj:
142
+ lib.freeMemory(res_obj["id"].encode("utf-8"))
143
+
144
+ def destroy_all_sessions(self) -> bool:
145
+ """Destroys all active sessions inside Go memory."""
146
+ lib = self.get_library()
147
+ response_ptr = lib.destroyAll()
148
+
149
+ if not response_ptr:
150
+ return False
151
+
152
+ try:
153
+ response_bytes = ctypes.string_at(response_ptr)
154
+ res_obj = json.loads(response_bytes.decode("utf-8"))
155
+ return res_obj.get("success", False)
156
+ finally:
157
+ if "res_obj" in locals() and isinstance(res_obj, dict) and "id" in res_obj:
158
+ lib.freeMemory(res_obj["id"].encode("utf-8"))