hermex 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hermex-0.1.0/LICENSE ADDED
@@ -0,0 +1,28 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Usama (PSEUDO)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ The watermark removal logic in hermex/gemini_watermark_remover.py is adapted
26
+ from GeminiWatermarkTool (https://github.com/allenk/GeminiWatermarkTool) by
27
+ allenk, originally written in C++ and rewritten in Python for this project.
28
+ That work is also released under the MIT License.
hermex-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,184 @@
1
+ Metadata-Version: 2.4
2
+ Name: hermex
3
+ Version: 0.1.0
4
+ Summary: Drive ChatGPT and Gemini from Python — no API keys, no billing, just the free web UI.
5
+ Author-email: Usama <pseudo.usama@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://hermex.usama.ai
8
+ Project-URL: Documentation, https://hermex.usama.ai
9
+ Project-URL: Repository, https://github.com/pseudo-usama/hermex
10
+ Keywords: chatgpt,gemini,llm,scraper,automation,selenium,browser
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: selenium>=4.0
23
+ Requires-Dist: undetected-chromedriver>=3.5
24
+ Requires-Dist: pyperclip>=1.8
25
+ Requires-Dist: opencv-python>=4.0
26
+ Requires-Dist: platformdirs>=4.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: ruff; extra == "dev"
29
+ Requires-Dist: mkdocs; extra == "dev"
30
+ Requires-Dist: mkdocs-material; extra == "dev"
31
+ Requires-Dist: mkdocstrings[python]; extra == "dev"
32
+ Dynamic: license-file
33
+
34
+ <p align="center">
35
+ <img src="https://raw.githubusercontent.com/pseudo-usama/hermex/main/docs/assets/logo.svg" alt="Hermex" width="450" style="margin: 24px 0;"/>
36
+ <br>
37
+ <em>Drive ChatGPT and Gemini from Python — no API keys, no billing, just the free web UI.</em>
38
+ <br><br>
39
+ <a href="https://pypi.org/project/hermex"><img src="https://img.shields.io/pypi/v/hermex?color=3cb371" alt="PyPI"/></a>
40
+ <img src="https://img.shields.io/pypi/pyversions/hermex?color=3cb371" alt="Python 3.11+"/>
41
+ <img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License"/>
42
+ <a href="https://github.com/pseudo-usama/hermex"><img src="https://img.shields.io/badge/GitHub-Hermex-181717?logo=github" alt="GitHub Repo"/></a>
43
+ </p>
44
+
45
+ ---
46
+
47
+ ChatGPT and Gemini are incredibly capable — but their official APIs are expensive, and for many tasks you simply don't need them. If you want to run OCR on an image, generate artwork, extract text from a screenshot, or just ask a quick question in a script, paying per-token for API access is overkill when the free web UI can do the same thing.
48
+
49
+ Hermex lets you drive ChatGPT and Gemini from Python just like a human would: it opens a real Chrome browser, types your message, uploads your images, waits for the response, and hands it back to you as a Python object. No API keys, no billing, no rate-limit tiers.
50
+
51
+ ```python
52
+ from hermex import ChatGPT
53
+
54
+ response = ChatGPT.simple_query("What does this receipt say?", images=["receipt.jpg"])
55
+ print(response.text)
56
+ ```
57
+
58
+ It uses undetected-chromedriver under the hood to avoid bot detection, and reuses a persistent browser profile so your login session survives across runs.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ pip install hermex
64
+ ```
65
+
66
+ Requires Python 3.11+ and Google Chrome 130+.
67
+
68
+ ## First-time setup
69
+
70
+ Hermex reuses a persistent Chrome profile so you only need to log in once:
71
+
72
+ ```python
73
+ from hermex import Gemini
74
+
75
+ Gemini.setup() # opens a browser — log in, browse briefly, then close the window
76
+ ```
77
+
78
+ After setup, all future runs reuse the saved session automatically. Repeat this if your session expires.
79
+
80
+ Guest mode (no login) works for basic text queries on Gemini but image upload requires a logged-in session. ChatGPT works without login for all features including image upload.
81
+
82
+ ## Usage
83
+
84
+ ### Single query
85
+
86
+ ```python
87
+ from hermex import Gemini, ChatGPT
88
+
89
+ # Gemini
90
+ gemini = Gemini()
91
+ gemini.open_url()
92
+ response = gemini.query("Summarize the history of the internet.")
93
+ print(response.text)
94
+ gemini.close()
95
+
96
+ # ChatGPT
97
+ chatgpt = ChatGPT()
98
+ chatgpt.open_url()
99
+ response = chatgpt.query("Summarize the history of the internet.")
100
+ print(response.text)
101
+ chatgpt.close()
102
+ ```
103
+
104
+ ### Sending images
105
+
106
+ ```python
107
+ response = gemini.query(
108
+ "Describe what's in this image.",
109
+ images=["photo.jpg"],
110
+ )
111
+ print(response.text)
112
+ ```
113
+
114
+ ### One-shot query
115
+
116
+ ```python
117
+ from hermex import Gemini, ChatGPT
118
+
119
+ response = Gemini.simple_query("What is the capital of France?")
120
+ print(response.text)
121
+
122
+ response = ChatGPT.simple_query("What is the capital of France?")
123
+ print(response.text)
124
+
125
+ # With an image
126
+ response = Gemini.simple_query("Describe this image.", images=["photo.jpg"])
127
+ print(response.text)
128
+ ```
129
+
130
+ ## AssistantMessage object
131
+
132
+ `query()` and `get_last_response()` return an `AssistantMessage` dataclass:
133
+
134
+ ```python
135
+ @dataclass
136
+ class AssistantMessage:
137
+ text: str | None # plain text (or markdown if get_markdown=True)
138
+ image: Path | None # path to downloaded image, or None
139
+ ```
140
+
141
+ ## API reference
142
+
143
+ Both `Gemini` and `ChatGPT` share the same interface — all methods below apply to both unless noted.
144
+
145
+ | Method | Description |
146
+ |---|---|
147
+ | `open_url(url, timeout)` | Open the chat interface in the browser |
148
+ | `send_message(message, submit, images, paste, fake_typing, typing_delay)` | Type and optionally submit a message |
149
+ | `query(message, timeout, images, paste, get_markdown, remove_watermark)` | Send a message, wait for the response, and return it |
150
+ | `get_last_response(get_markdown, remove_watermark)` | Retrieve the most recent response |
151
+ | `wait_until_idle(timeout)` | Block until the chatbot finishes generating |
152
+ | `get_state()` | Return the current UI state (`State.IDLE`, `GENERATING`, `TYPING`, `UPLOADING`) |
153
+ | `simple_query(prompt, images, timeout)` | Class method — open, query, close in one call |
154
+ | `short_wait()` | Sleep ~7 seconds |
155
+ | `long_wait()` | Sleep ~5 minutes |
156
+ | `refresh_page()` | Reload the current page |
157
+ | `close()` | Close the browser |
158
+ | `setup()` | One-time login setup (class method) |
159
+
160
+ ### Constructor options
161
+
162
+ ```python
163
+ Gemini(
164
+ chrome_version=None, # auto-detected from installed Chrome
165
+ download_dir=Path("."), # where generated images are saved
166
+ headless=False,
167
+ typing_delay=0.025, # seconds between keystrokes
168
+ disable_web_security=True,
169
+ )
170
+ # ChatGPT accepts the same parameters
171
+ ```
172
+
173
+ ## Watermark removal
174
+
175
+ Gemini watermarks its generated images. Pass `remove_watermark=True` to strip it:
176
+
177
+ ```python
178
+ response = gemini.query("Generate an image of a sunset.", remove_watermark=True)
179
+ ```
180
+
181
+ ## Notes
182
+
183
+ - Bot detection is mitigated through per-character typing delays, fake typing before paste, a persistent browser profile, and a spoofed user agent. Avoid running headless for sensitive sessions.
184
+ - Browser profile and session data are stored in the platform data directory (`~/Library/Application Support/hermex` on macOS).
hermex-0.1.0/README.md ADDED
@@ -0,0 +1,151 @@
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/pseudo-usama/hermex/main/docs/assets/logo.svg" alt="Hermex" width="450" style="margin: 24px 0;"/>
3
+ <br>
4
+ <em>Drive ChatGPT and Gemini from Python — no API keys, no billing, just the free web UI.</em>
5
+ <br><br>
6
+ <a href="https://pypi.org/project/hermex"><img src="https://img.shields.io/pypi/v/hermex?color=3cb371" alt="PyPI"/></a>
7
+ <img src="https://img.shields.io/pypi/pyversions/hermex?color=3cb371" alt="Python 3.11+"/>
8
+ <img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License"/>
9
+ <a href="https://github.com/pseudo-usama/hermex"><img src="https://img.shields.io/badge/GitHub-Hermex-181717?logo=github" alt="GitHub Repo"/></a>
10
+ </p>
11
+
12
+ ---
13
+
14
+ ChatGPT and Gemini are incredibly capable — but their official APIs are expensive, and for many tasks you simply don't need them. If you want to run OCR on an image, generate artwork, extract text from a screenshot, or just ask a quick question in a script, paying per-token for API access is overkill when the free web UI can do the same thing.
15
+
16
+ Hermex lets you drive ChatGPT and Gemini from Python just like a human would: it opens a real Chrome browser, types your message, uploads your images, waits for the response, and hands it back to you as a Python object. No API keys, no billing, no rate-limit tiers.
17
+
18
+ ```python
19
+ from hermex import ChatGPT
20
+
21
+ response = ChatGPT.simple_query("What does this receipt say?", images=["receipt.jpg"])
22
+ print(response.text)
23
+ ```
24
+
25
+ It uses undetected-chromedriver under the hood to avoid bot detection, and reuses a persistent browser profile so your login session survives across runs.
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ pip install hermex
31
+ ```
32
+
33
+ Requires Python 3.11+ and Google Chrome 130+.
34
+
35
+ ## First-time setup
36
+
37
+ Hermex reuses a persistent Chrome profile so you only need to log in once:
38
+
39
+ ```python
40
+ from hermex import Gemini
41
+
42
+ Gemini.setup() # opens a browser — log in, browse briefly, then close the window
43
+ ```
44
+
45
+ After setup, all future runs reuse the saved session automatically. Repeat this if your session expires.
46
+
47
+ Guest mode (no login) works for basic text queries on Gemini but image upload requires a logged-in session. ChatGPT works without login for all features including image upload.
48
+
49
+ ## Usage
50
+
51
+ ### Single query
52
+
53
+ ```python
54
+ from hermex import Gemini, ChatGPT
55
+
56
+ # Gemini
57
+ gemini = Gemini()
58
+ gemini.open_url()
59
+ response = gemini.query("Summarize the history of the internet.")
60
+ print(response.text)
61
+ gemini.close()
62
+
63
+ # ChatGPT
64
+ chatgpt = ChatGPT()
65
+ chatgpt.open_url()
66
+ response = chatgpt.query("Summarize the history of the internet.")
67
+ print(response.text)
68
+ chatgpt.close()
69
+ ```
70
+
71
+ ### Sending images
72
+
73
+ ```python
74
+ response = gemini.query(
75
+ "Describe what's in this image.",
76
+ images=["photo.jpg"],
77
+ )
78
+ print(response.text)
79
+ ```
80
+
81
+ ### One-shot query
82
+
83
+ ```python
84
+ from hermex import Gemini, ChatGPT
85
+
86
+ response = Gemini.simple_query("What is the capital of France?")
87
+ print(response.text)
88
+
89
+ response = ChatGPT.simple_query("What is the capital of France?")
90
+ print(response.text)
91
+
92
+ # With an image
93
+ response = Gemini.simple_query("Describe this image.", images=["photo.jpg"])
94
+ print(response.text)
95
+ ```
96
+
97
+ ## AssistantMessage object
98
+
99
+ `query()` and `get_last_response()` return an `AssistantMessage` dataclass:
100
+
101
+ ```python
102
+ @dataclass
103
+ class AssistantMessage:
104
+ text: str | None # plain text (or markdown if get_markdown=True)
105
+ image: Path | None # path to downloaded image, or None
106
+ ```
107
+
108
+ ## API reference
109
+
110
+ Both `Gemini` and `ChatGPT` share the same interface — all methods below apply to both unless noted.
111
+
112
+ | Method | Description |
113
+ |---|---|
114
+ | `open_url(url, timeout)` | Open the chat interface in the browser |
115
+ | `send_message(message, submit, images, paste, fake_typing, typing_delay)` | Type and optionally submit a message |
116
+ | `query(message, timeout, images, paste, get_markdown, remove_watermark)` | Send a message, wait for the response, and return it |
117
+ | `get_last_response(get_markdown, remove_watermark)` | Retrieve the most recent response |
118
+ | `wait_until_idle(timeout)` | Block until the chatbot finishes generating |
119
+ | `get_state()` | Return the current UI state (`State.IDLE`, `GENERATING`, `TYPING`, `UPLOADING`) |
120
+ | `simple_query(prompt, images, timeout)` | Class method — open, query, close in one call |
121
+ | `short_wait()` | Sleep ~7 seconds |
122
+ | `long_wait()` | Sleep ~5 minutes |
123
+ | `refresh_page()` | Reload the current page |
124
+ | `close()` | Close the browser |
125
+ | `setup()` | One-time login setup (class method) |
126
+
127
+ ### Constructor options
128
+
129
+ ```python
130
+ Gemini(
131
+ chrome_version=None, # auto-detected from installed Chrome
132
+ download_dir=Path("."), # where generated images are saved
133
+ headless=False,
134
+ typing_delay=0.025, # seconds between keystrokes
135
+ disable_web_security=True,
136
+ )
137
+ # ChatGPT accepts the same parameters
138
+ ```
139
+
140
+ ## Watermark removal
141
+
142
+ Gemini watermarks its generated images. Pass `remove_watermark=True` to strip it:
143
+
144
+ ```python
145
+ response = gemini.query("Generate an image of a sunset.", remove_watermark=True)
146
+ ```
147
+
148
+ ## Notes
149
+
150
+ - Bot detection is mitigated through per-character typing delays, fake typing before paste, a persistent browser profile, and a spoofed user agent. Avoid running headless for sensitive sessions.
151
+ - Browser profile and session data are stored in the platform data directory (`~/Library/Application Support/hermex` on macOS).
@@ -0,0 +1,14 @@
1
+ from hermex.chatgpt import ChatGPT
2
+ from hermex.exceptions import LoginRequiredError
3
+ from hermex.gemini import Gemini
4
+ from hermex.models import AssistantMessage, State
5
+ from hermex.utils import clear_data
6
+
7
+ __all__ = [
8
+ "AssistantMessage",
9
+ "State",
10
+ "LoginRequiredError",
11
+ "Gemini",
12
+ "ChatGPT",
13
+ "clear_data",
14
+ ]
Binary file
Binary file
@@ -0,0 +1,170 @@
1
+ from pathlib import Path
2
+
3
+ import pyperclip
4
+ from selenium.common.exceptions import NoSuchElementException, TimeoutException
5
+ from selenium.webdriver.common.action_chains import ActionChains
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.common.keys import Keys
8
+ from selenium.webdriver.remote.webelement import WebElement
9
+ from selenium.webdriver.support import expected_conditions as EC
10
+ from selenium.webdriver.support.ui import WebDriverWait
11
+
12
+ from hermex.config import SUPPORTED_IMAGE_EXTENSIONS
13
+ from hermex.models import AssistantMessage, State
14
+ from hermex.scraper_base import Scraper
15
+
16
+
17
+ class ChatGPT(Scraper):
18
+ """
19
+ Scraper for ChatGPT (chatgpt.com).
20
+
21
+ Supports text queries, image uploads, and downloading generated images.
22
+ Works without login for all current features including image upload.
23
+ """
24
+
25
+ def open_url(self, url="https://chatgpt.com", timeout=30):
26
+ if "chatgpt.com" not in url:
27
+ raise ValueError(f"Expected a chatgpt.com URL, got: {url}")
28
+ super().open_url(url, timeout)
29
+ return self
30
+
31
+ def wait_for_page_load(self, timeout: float = 30) -> None:
32
+ WebDriverWait(self.driver, timeout).until(
33
+ EC.presence_of_element_located(
34
+ (By.CSS_SELECTOR, 'div[contenteditable="true"]')
35
+ )
36
+ )
37
+
38
+ def _detect_login(self):
39
+ try:
40
+ self.driver.find_element(
41
+ By.CSS_SELECTOR, 'button[data-testid="login-button"]'
42
+ )
43
+ self.is_logged_in = False
44
+ except Exception:
45
+ self.is_logged_in = True
46
+
47
+ def send_message(
48
+ self,
49
+ message,
50
+ submit=True,
51
+ images: list[str | Path] = None,
52
+ paste=False,
53
+ fake_typing=True,
54
+ typing_delay: float = None,
55
+ ):
56
+ if images:
57
+ self._upload_imgs(images)
58
+
59
+ wait = WebDriverWait(self.driver, 20)
60
+ input_box = wait.until(
61
+ EC.element_to_be_clickable((By.CSS_SELECTOR, 'div[contenteditable="true"]'))
62
+ )
63
+ input_box.click()
64
+ self.sleep(0.5)
65
+
66
+ if paste:
67
+ self._paste_into(
68
+ message, input_box, fake_typing=fake_typing, typing_delay=typing_delay
69
+ )
70
+ else:
71
+ self._type_into(message, input_box, typing_delay=typing_delay)
72
+
73
+ if images:
74
+ self._wait_until_state(State.TYPING)
75
+
76
+ if submit:
77
+ input_box.send_keys("\n")
78
+
79
+ return self
80
+
81
+ def _upload_imgs(self, image_paths: list[str | Path]):
82
+ resolved = []
83
+ for image_path in image_paths:
84
+ image_path = Path(image_path).resolve()
85
+ if image_path.suffix.lower() not in SUPPORTED_IMAGE_EXTENSIONS:
86
+ raise ValueError(
87
+ f"Unsupported file type '{image_path.suffix}'. Must be one of: {SUPPORTED_IMAGE_EXTENSIONS}"
88
+ )
89
+ resolved.append(image_path)
90
+
91
+ file_input = self.driver.find_element(By.CSS_SELECTOR, "#upload-photos")
92
+ self.driver.execute_script("arguments[0].style.display = 'block';", file_input)
93
+ file_input.send_keys("\n".join(str(p) for p in resolved))
94
+
95
+ def get_last_response(
96
+ self, get_markdown=False, remove_watermark=False
97
+ ) -> AssistantMessage:
98
+ # ChatGPT does not watermark generated images, so remove_watermark is a no-op.
99
+
100
+ wait = WebDriverWait(self.driver, 20)
101
+
102
+ def _get_img(element: WebElement):
103
+ image_elems = element.find_elements(By.CSS_SELECTOR, "img")
104
+ if not image_elems:
105
+ raise NoSuchElementException("No image element in this response.")
106
+ self.driver.execute_script("arguments[0].click();", image_elems[0])
107
+ self.sleep(2)
108
+ down_btn = wait.until(
109
+ EC.presence_of_element_located(
110
+ (By.CSS_SELECTOR, 'header button[aria-label="Save"]')
111
+ )
112
+ )
113
+ self.driver.execute_script("arguments[0].click();", down_btn)
114
+ img = self._get_downloaded_file()
115
+ self.sleep(1)
116
+ ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
117
+ self.sleep(0.5)
118
+ return img
119
+
120
+ def _get_text(element: WebElement, get_markdown: bool):
121
+ elem = element.find_element(By.CSS_SELECTOR, ".markdown")
122
+ inner_text = elem.text.strip()
123
+ if inner_text == "":
124
+ return None
125
+ if not get_markdown:
126
+ return inner_text
127
+ element.find_element(
128
+ By.CSS_SELECTOR, 'button[aria-label="Copy response"]'
129
+ ).click()
130
+ self.sleep(0.5)
131
+ return pyperclip.paste()
132
+
133
+ responses = wait.until(
134
+ EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".agent-turn"))
135
+ )
136
+
137
+ if not responses:
138
+ raise TimeoutException("No responses found in the chat.")
139
+
140
+ last_response = responses[-1]
141
+
142
+ try:
143
+ text_content = _get_text(last_response, get_markdown)
144
+ except NoSuchElementException:
145
+ text_content = None
146
+
147
+ try:
148
+ img = _get_img(last_response)
149
+ except NoSuchElementException:
150
+ img = None
151
+
152
+ if text_content is None and img is None:
153
+ raise RuntimeError("Response contained neither text nor image.")
154
+
155
+ return AssistantMessage(text=text_content, image=img)
156
+
157
+ def get_state(self) -> State:
158
+ if self.driver.find_elements(By.CSS_SELECTOR, '[data-testid="stop-button"]'):
159
+ return State.GENERATING
160
+
161
+ try:
162
+ send_btn = self.driver.find_element(
163
+ By.CSS_SELECTOR, '[data-testid="send-button"]'
164
+ )
165
+ except NoSuchElementException:
166
+ return State.IDLE
167
+
168
+ if send_btn.get_attribute("disabled"):
169
+ return State.UPLOADING
170
+ return State.TYPING
@@ -0,0 +1,9 @@
1
+ from pathlib import Path
2
+
3
+ from platformdirs import user_data_dir
4
+
5
+ SUPPORTED_IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg"}
6
+ data_dir = Path(user_data_dir("hermex", appauthor=False))
7
+ LONG_WAIT = 5 * 60
8
+ SHORT_WAIT = 7
9
+ MIN_CHROME_VERSION = 130
@@ -0,0 +1,2 @@
1
+ class LoginRequiredError(Exception):
2
+ pass