hermex 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hermex-0.1.0/LICENSE +28 -0
- hermex-0.1.0/PKG-INFO +184 -0
- hermex-0.1.0/README.md +151 -0
- hermex-0.1.0/hermex/__init__.py +14 -0
- hermex-0.1.0/hermex/assets/bg_48.png +0 -0
- hermex-0.1.0/hermex/assets/bg_96.png +0 -0
- hermex-0.1.0/hermex/chatgpt.py +170 -0
- hermex-0.1.0/hermex/config.py +9 -0
- hermex-0.1.0/hermex/exceptions.py +2 -0
- hermex-0.1.0/hermex/gemini.py +212 -0
- hermex-0.1.0/hermex/gemini_watermark_remover.py +70 -0
- hermex-0.1.0/hermex/models.py +30 -0
- hermex-0.1.0/hermex/scraper_base.py +450 -0
- hermex-0.1.0/hermex/utils.py +43 -0
- hermex-0.1.0/hermex.egg-info/PKG-INFO +184 -0
- hermex-0.1.0/hermex.egg-info/SOURCES.txt +19 -0
- hermex-0.1.0/hermex.egg-info/dependency_links.txt +1 -0
- hermex-0.1.0/hermex.egg-info/requires.txt +11 -0
- hermex-0.1.0/hermex.egg-info/top_level.txt +1 -0
- hermex-0.1.0/pyproject.toml +52 -0
- hermex-0.1.0/setup.cfg +4 -0
hermex-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Usama (PSEUDO)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
The watermark removal logic in hermex/gemini_watermark_remover.py is adapted
|
|
26
|
+
from GeminiWatermarkTool (https://github.com/allenk/GeminiWatermarkTool) by
|
|
27
|
+
allenk, originally written in C++ and rewritten in Python for this project.
|
|
28
|
+
That work is also released under the MIT License.
|
hermex-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hermex
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Drive ChatGPT and Gemini from Python — no API keys, no billing, just the free web UI.
|
|
5
|
+
Author-email: Usama <pseudo.usama@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://hermex.usama.ai
|
|
8
|
+
Project-URL: Documentation, https://hermex.usama.ai
|
|
9
|
+
Project-URL: Repository, https://github.com/pseudo-usama/hermex
|
|
10
|
+
Keywords: chatgpt,gemini,llm,scraper,automation,selenium,browser
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: selenium>=4.0
|
|
23
|
+
Requires-Dist: undetected-chromedriver>=3.5
|
|
24
|
+
Requires-Dist: pyperclip>=1.8
|
|
25
|
+
Requires-Dist: opencv-python>=4.0
|
|
26
|
+
Requires-Dist: platformdirs>=4.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: ruff; extra == "dev"
|
|
29
|
+
Requires-Dist: mkdocs; extra == "dev"
|
|
30
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
31
|
+
Requires-Dist: mkdocstrings[python]; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
<p align="center">
|
|
35
|
+
<img src="https://raw.githubusercontent.com/pseudo-usama/hermex/main/docs/assets/logo.svg" alt="Hermex" width="450" style="margin: 24px 0;"/>
|
|
36
|
+
<br>
|
|
37
|
+
<em>Drive ChatGPT and Gemini from Python — no API keys, no billing, just the free web UI.</em>
|
|
38
|
+
<br><br>
|
|
39
|
+
<a href="https://pypi.org/project/hermex"><img src="https://img.shields.io/pypi/v/hermex?color=3cb371" alt="PyPI"/></a>
|
|
40
|
+
<img src="https://img.shields.io/pypi/pyversions/hermex?color=3cb371" alt="Python 3.11+"/>
|
|
41
|
+
<img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License"/>
|
|
42
|
+
<a href="https://github.com/pseudo-usama/hermex"><img src="https://img.shields.io/badge/GitHub-Hermex-181717?logo=github" alt="GitHub Repo"/></a>
|
|
43
|
+
</p>
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
ChatGPT and Gemini are incredibly capable — but their official APIs are expensive, and for many tasks you simply don't need them. If you want to run OCR on an image, generate artwork, extract text from a screenshot, or just ask a quick question in a script, paying per-token for API access is overkill when the free web UI can do the same thing.
|
|
48
|
+
|
|
49
|
+
Hermex lets you drive ChatGPT and Gemini from Python just like a human would: it opens a real Chrome browser, types your message, uploads your images, waits for the response, and hands it back to you as a Python object. No API keys, no billing, no rate-limit tiers.
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from hermex import ChatGPT
|
|
53
|
+
|
|
54
|
+
response = ChatGPT.simple_query("What does this receipt say?", images=["receipt.jpg"])
|
|
55
|
+
print(response.text)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
It uses undetected-chromedriver under the hood to avoid bot detection, and reuses a persistent browser profile so your login session survives across runs.
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install hermex
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Requires Python 3.11+ and Google Chrome 130+.
|
|
67
|
+
|
|
68
|
+
## First-time setup
|
|
69
|
+
|
|
70
|
+
Hermex reuses a persistent Chrome profile so you only need to log in once:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from hermex import Gemini
|
|
74
|
+
|
|
75
|
+
Gemini.setup() # opens a browser — log in, browse briefly, then close the window
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
After setup, all future runs reuse the saved session automatically. Repeat this if your session expires.
|
|
79
|
+
|
|
80
|
+
Guest mode (no login) works for basic text queries on Gemini but image upload requires a logged-in session. ChatGPT works without login for all features including image upload.
|
|
81
|
+
|
|
82
|
+
## Usage
|
|
83
|
+
|
|
84
|
+
### Single query
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from hermex import Gemini, ChatGPT
|
|
88
|
+
|
|
89
|
+
# Gemini
|
|
90
|
+
gemini = Gemini()
|
|
91
|
+
gemini.open_url()
|
|
92
|
+
response = gemini.query("Summarize the history of the internet.")
|
|
93
|
+
print(response.text)
|
|
94
|
+
gemini.close()
|
|
95
|
+
|
|
96
|
+
# ChatGPT
|
|
97
|
+
chatgpt = ChatGPT()
|
|
98
|
+
chatgpt.open_url()
|
|
99
|
+
response = chatgpt.query("Summarize the history of the internet.")
|
|
100
|
+
print(response.text)
|
|
101
|
+
chatgpt.close()
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Sending images
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
response = gemini.query(
|
|
108
|
+
"Describe what's in this image.",
|
|
109
|
+
images=["photo.jpg"],
|
|
110
|
+
)
|
|
111
|
+
print(response.text)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### One-shot query
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from hermex import Gemini, ChatGPT
|
|
118
|
+
|
|
119
|
+
response = Gemini.simple_query("What is the capital of France?")
|
|
120
|
+
print(response.text)
|
|
121
|
+
|
|
122
|
+
response = ChatGPT.simple_query("What is the capital of France?")
|
|
123
|
+
print(response.text)
|
|
124
|
+
|
|
125
|
+
# With an image
|
|
126
|
+
response = Gemini.simple_query("Describe this image.", images=["photo.jpg"])
|
|
127
|
+
print(response.text)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## AssistantMessage object
|
|
131
|
+
|
|
132
|
+
`query()` and `get_last_response()` return an `AssistantMessage` dataclass:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
@dataclass
|
|
136
|
+
class AssistantMessage:
|
|
137
|
+
text: str | None # plain text (or markdown if get_markdown=True)
|
|
138
|
+
image: Path | None # path to downloaded image, or None
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## API reference
|
|
142
|
+
|
|
143
|
+
Both `Gemini` and `ChatGPT` share the same interface — all methods below apply to both unless noted.
|
|
144
|
+
|
|
145
|
+
| Method | Description |
|
|
146
|
+
|---|---|
|
|
147
|
+
| `open_url(url, timeout)` | Open the chat interface in the browser |
|
|
148
|
+
| `send_message(message, submit, images, paste, fake_typing, typing_delay)` | Type and optionally submit a message |
|
|
149
|
+
| `query(message, timeout, images, paste, get_markdown, remove_watermark)` | Send a message, wait for the response, and return it |
|
|
150
|
+
| `get_last_response(get_markdown, remove_watermark)` | Retrieve the most recent response |
|
|
151
|
+
| `wait_until_idle(timeout)` | Block until the chatbot finishes generating |
|
|
152
|
+
| `get_state()` | Return the current UI state (`State.IDLE`, `GENERATING`, `TYPING`, `UPLOADING`) |
|
|
153
|
+
| `simple_query(prompt, images, timeout)` | Class method — open, query, close in one call |
|
|
154
|
+
| `short_wait()` | Sleep ~7 seconds |
|
|
155
|
+
| `long_wait()` | Sleep ~5 minutes |
|
|
156
|
+
| `refresh_page()` | Reload the current page |
|
|
157
|
+
| `close()` | Close the browser |
|
|
158
|
+
| `setup()` | One-time login setup (class method) |
|
|
159
|
+
|
|
160
|
+
### Constructor options
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
Gemini(
|
|
164
|
+
chrome_version=None, # auto-detected from installed Chrome
|
|
165
|
+
download_dir=Path("."), # where generated images are saved
|
|
166
|
+
headless=False,
|
|
167
|
+
typing_delay=0.025, # seconds between keystrokes
|
|
168
|
+
disable_web_security=True,
|
|
169
|
+
)
|
|
170
|
+
# ChatGPT accepts the same parameters
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Watermark removal
|
|
174
|
+
|
|
175
|
+
Gemini watermarks its generated images. Pass `remove_watermark=True` to strip it:
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
response = gemini.query("Generate an image of a sunset.", remove_watermark=True)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Notes
|
|
182
|
+
|
|
183
|
+
- Bot detection is mitigated through per-character typing delays, fake typing before paste, a persistent browser profile, and a spoofed user agent. Avoid running headless for sensitive sessions.
|
|
184
|
+
- Browser profile and session data are stored in the platform data directory (`~/Library/Application Support/hermex` on macOS).
|
hermex-0.1.0/README.md
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/pseudo-usama/hermex/main/docs/assets/logo.svg" alt="Hermex" width="450" style="margin: 24px 0;"/>
|
|
3
|
+
<br>
|
|
4
|
+
<em>Drive ChatGPT and Gemini from Python — no API keys, no billing, just the free web UI.</em>
|
|
5
|
+
<br><br>
|
|
6
|
+
<a href="https://pypi.org/project/hermex"><img src="https://img.shields.io/pypi/v/hermex?color=3cb371" alt="PyPI"/></a>
|
|
7
|
+
<img src="https://img.shields.io/pypi/pyversions/hermex?color=3cb371" alt="Python 3.11+"/>
|
|
8
|
+
<img src="https://img.shields.io/badge/license-MIT-blue" alt="MIT License"/>
|
|
9
|
+
<a href="https://github.com/pseudo-usama/hermex"><img src="https://img.shields.io/badge/GitHub-Hermex-181717?logo=github" alt="GitHub Repo"/></a>
|
|
10
|
+
</p>
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
ChatGPT and Gemini are incredibly capable — but their official APIs are expensive, and for many tasks you simply don't need them. If you want to run OCR on an image, generate artwork, extract text from a screenshot, or just ask a quick question in a script, paying per-token for API access is overkill when the free web UI can do the same thing.
|
|
15
|
+
|
|
16
|
+
Hermex lets you drive ChatGPT and Gemini from Python just like a human would: it opens a real Chrome browser, types your message, uploads your images, waits for the response, and hands it back to you as a Python object. No API keys, no billing, no rate-limit tiers.
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from hermex import ChatGPT
|
|
20
|
+
|
|
21
|
+
response = ChatGPT.simple_query("What does this receipt say?", images=["receipt.jpg"])
|
|
22
|
+
print(response.text)
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
It uses undetected-chromedriver under the hood to avoid bot detection, and reuses a persistent browser profile so your login session survives across runs.
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install hermex
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Requires Python 3.11+ and Google Chrome 130+.
|
|
34
|
+
|
|
35
|
+
## First-time setup
|
|
36
|
+
|
|
37
|
+
Hermex reuses a persistent Chrome profile so you only need to log in once:
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from hermex import Gemini
|
|
41
|
+
|
|
42
|
+
Gemini.setup() # opens a browser — log in, browse briefly, then close the window
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
After setup, all future runs reuse the saved session automatically. Repeat this if your session expires.
|
|
46
|
+
|
|
47
|
+
Guest mode (no login) works for basic text queries on Gemini but image upload requires a logged-in session. ChatGPT works without login for all features including image upload.
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
### Single query
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from hermex import Gemini, ChatGPT
|
|
55
|
+
|
|
56
|
+
# Gemini
|
|
57
|
+
gemini = Gemini()
|
|
58
|
+
gemini.open_url()
|
|
59
|
+
response = gemini.query("Summarize the history of the internet.")
|
|
60
|
+
print(response.text)
|
|
61
|
+
gemini.close()
|
|
62
|
+
|
|
63
|
+
# ChatGPT
|
|
64
|
+
chatgpt = ChatGPT()
|
|
65
|
+
chatgpt.open_url()
|
|
66
|
+
response = chatgpt.query("Summarize the history of the internet.")
|
|
67
|
+
print(response.text)
|
|
68
|
+
chatgpt.close()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Sending images
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
response = gemini.query(
|
|
75
|
+
"Describe what's in this image.",
|
|
76
|
+
images=["photo.jpg"],
|
|
77
|
+
)
|
|
78
|
+
print(response.text)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### One-shot query
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
from hermex import Gemini, ChatGPT
|
|
85
|
+
|
|
86
|
+
response = Gemini.simple_query("What is the capital of France?")
|
|
87
|
+
print(response.text)
|
|
88
|
+
|
|
89
|
+
response = ChatGPT.simple_query("What is the capital of France?")
|
|
90
|
+
print(response.text)
|
|
91
|
+
|
|
92
|
+
# With an image
|
|
93
|
+
response = Gemini.simple_query("Describe this image.", images=["photo.jpg"])
|
|
94
|
+
print(response.text)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## AssistantMessage object
|
|
98
|
+
|
|
99
|
+
`query()` and `get_last_response()` return an `AssistantMessage` dataclass:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
@dataclass
|
|
103
|
+
class AssistantMessage:
|
|
104
|
+
text: str | None # plain text (or markdown if get_markdown=True)
|
|
105
|
+
image: Path | None # path to downloaded image, or None
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## API reference
|
|
109
|
+
|
|
110
|
+
Both `Gemini` and `ChatGPT` share the same interface — all methods below apply to both unless noted.
|
|
111
|
+
|
|
112
|
+
| Method | Description |
|
|
113
|
+
|---|---|
|
|
114
|
+
| `open_url(url, timeout)` | Open the chat interface in the browser |
|
|
115
|
+
| `send_message(message, submit, images, paste, fake_typing, typing_delay)` | Type and optionally submit a message |
|
|
116
|
+
| `query(message, timeout, images, paste, get_markdown, remove_watermark)` | Send a message, wait for the response, and return it |
|
|
117
|
+
| `get_last_response(get_markdown, remove_watermark)` | Retrieve the most recent response |
|
|
118
|
+
| `wait_until_idle(timeout)` | Block until the chatbot finishes generating |
|
|
119
|
+
| `get_state()` | Return the current UI state (`State.IDLE`, `GENERATING`, `TYPING`, `UPLOADING`) |
|
|
120
|
+
| `simple_query(prompt, images, timeout)` | Class method — open, query, close in one call |
|
|
121
|
+
| `short_wait()` | Sleep ~7 seconds |
|
|
122
|
+
| `long_wait()` | Sleep ~5 minutes |
|
|
123
|
+
| `refresh_page()` | Reload the current page |
|
|
124
|
+
| `close()` | Close the browser |
|
|
125
|
+
| `setup()` | One-time login setup (class method) |
|
|
126
|
+
|
|
127
|
+
### Constructor options
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
Gemini(
|
|
131
|
+
chrome_version=None, # auto-detected from installed Chrome
|
|
132
|
+
download_dir=Path("."), # where generated images are saved
|
|
133
|
+
headless=False,
|
|
134
|
+
typing_delay=0.025, # seconds between keystrokes
|
|
135
|
+
disable_web_security=True,
|
|
136
|
+
)
|
|
137
|
+
# ChatGPT accepts the same parameters
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Watermark removal
|
|
141
|
+
|
|
142
|
+
Gemini watermarks its generated images. Pass `remove_watermark=True` to strip it:
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
response = gemini.query("Generate an image of a sunset.", remove_watermark=True)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Notes
|
|
149
|
+
|
|
150
|
+
- Bot detection is mitigated through per-character typing delays, fake typing before paste, a persistent browser profile, and a spoofed user agent. Avoid running headless for sensitive sessions.
|
|
151
|
+
- Browser profile and session data are stored in the platform data directory (`~/Library/Application Support/hermex` on macOS).
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from hermex.chatgpt import ChatGPT
|
|
2
|
+
from hermex.exceptions import LoginRequiredError
|
|
3
|
+
from hermex.gemini import Gemini
|
|
4
|
+
from hermex.models import AssistantMessage, State
|
|
5
|
+
from hermex.utils import clear_data
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"AssistantMessage",
|
|
9
|
+
"State",
|
|
10
|
+
"LoginRequiredError",
|
|
11
|
+
"Gemini",
|
|
12
|
+
"ChatGPT",
|
|
13
|
+
"clear_data",
|
|
14
|
+
]
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pyperclip
|
|
4
|
+
from selenium.common.exceptions import NoSuchElementException, TimeoutException
|
|
5
|
+
from selenium.webdriver.common.action_chains import ActionChains
|
|
6
|
+
from selenium.webdriver.common.by import By
|
|
7
|
+
from selenium.webdriver.common.keys import Keys
|
|
8
|
+
from selenium.webdriver.remote.webelement import WebElement
|
|
9
|
+
from selenium.webdriver.support import expected_conditions as EC
|
|
10
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
|
11
|
+
|
|
12
|
+
from hermex.config import SUPPORTED_IMAGE_EXTENSIONS
|
|
13
|
+
from hermex.models import AssistantMessage, State
|
|
14
|
+
from hermex.scraper_base import Scraper
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ChatGPT(Scraper):
|
|
18
|
+
"""
|
|
19
|
+
Scraper for ChatGPT (chatgpt.com).
|
|
20
|
+
|
|
21
|
+
Supports text queries, image uploads, and downloading generated images.
|
|
22
|
+
Works without login for all current features including image upload.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def open_url(self, url="https://chatgpt.com", timeout=30):
|
|
26
|
+
if "chatgpt.com" not in url:
|
|
27
|
+
raise ValueError(f"Expected a chatgpt.com URL, got: {url}")
|
|
28
|
+
super().open_url(url, timeout)
|
|
29
|
+
return self
|
|
30
|
+
|
|
31
|
+
def wait_for_page_load(self, timeout: float = 30) -> None:
|
|
32
|
+
WebDriverWait(self.driver, timeout).until(
|
|
33
|
+
EC.presence_of_element_located(
|
|
34
|
+
(By.CSS_SELECTOR, 'div[contenteditable="true"]')
|
|
35
|
+
)
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def _detect_login(self):
|
|
39
|
+
try:
|
|
40
|
+
self.driver.find_element(
|
|
41
|
+
By.CSS_SELECTOR, 'button[data-testid="login-button"]'
|
|
42
|
+
)
|
|
43
|
+
self.is_logged_in = False
|
|
44
|
+
except Exception:
|
|
45
|
+
self.is_logged_in = True
|
|
46
|
+
|
|
47
|
+
def send_message(
|
|
48
|
+
self,
|
|
49
|
+
message,
|
|
50
|
+
submit=True,
|
|
51
|
+
images: list[str | Path] = None,
|
|
52
|
+
paste=False,
|
|
53
|
+
fake_typing=True,
|
|
54
|
+
typing_delay: float = None,
|
|
55
|
+
):
|
|
56
|
+
if images:
|
|
57
|
+
self._upload_imgs(images)
|
|
58
|
+
|
|
59
|
+
wait = WebDriverWait(self.driver, 20)
|
|
60
|
+
input_box = wait.until(
|
|
61
|
+
EC.element_to_be_clickable((By.CSS_SELECTOR, 'div[contenteditable="true"]'))
|
|
62
|
+
)
|
|
63
|
+
input_box.click()
|
|
64
|
+
self.sleep(0.5)
|
|
65
|
+
|
|
66
|
+
if paste:
|
|
67
|
+
self._paste_into(
|
|
68
|
+
message, input_box, fake_typing=fake_typing, typing_delay=typing_delay
|
|
69
|
+
)
|
|
70
|
+
else:
|
|
71
|
+
self._type_into(message, input_box, typing_delay=typing_delay)
|
|
72
|
+
|
|
73
|
+
if images:
|
|
74
|
+
self._wait_until_state(State.TYPING)
|
|
75
|
+
|
|
76
|
+
if submit:
|
|
77
|
+
input_box.send_keys("\n")
|
|
78
|
+
|
|
79
|
+
return self
|
|
80
|
+
|
|
81
|
+
def _upload_imgs(self, image_paths: list[str | Path]):
|
|
82
|
+
resolved = []
|
|
83
|
+
for image_path in image_paths:
|
|
84
|
+
image_path = Path(image_path).resolve()
|
|
85
|
+
if image_path.suffix.lower() not in SUPPORTED_IMAGE_EXTENSIONS:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
f"Unsupported file type '{image_path.suffix}'. Must be one of: {SUPPORTED_IMAGE_EXTENSIONS}"
|
|
88
|
+
)
|
|
89
|
+
resolved.append(image_path)
|
|
90
|
+
|
|
91
|
+
file_input = self.driver.find_element(By.CSS_SELECTOR, "#upload-photos")
|
|
92
|
+
self.driver.execute_script("arguments[0].style.display = 'block';", file_input)
|
|
93
|
+
file_input.send_keys("\n".join(str(p) for p in resolved))
|
|
94
|
+
|
|
95
|
+
def get_last_response(
|
|
96
|
+
self, get_markdown=False, remove_watermark=False
|
|
97
|
+
) -> AssistantMessage:
|
|
98
|
+
# ChatGPT does not watermark generated images, so remove_watermark is a no-op.
|
|
99
|
+
|
|
100
|
+
wait = WebDriverWait(self.driver, 20)
|
|
101
|
+
|
|
102
|
+
def _get_img(element: WebElement):
|
|
103
|
+
image_elems = element.find_elements(By.CSS_SELECTOR, "img")
|
|
104
|
+
if not image_elems:
|
|
105
|
+
raise NoSuchElementException("No image element in this response.")
|
|
106
|
+
self.driver.execute_script("arguments[0].click();", image_elems[0])
|
|
107
|
+
self.sleep(2)
|
|
108
|
+
down_btn = wait.until(
|
|
109
|
+
EC.presence_of_element_located(
|
|
110
|
+
(By.CSS_SELECTOR, 'header button[aria-label="Save"]')
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
self.driver.execute_script("arguments[0].click();", down_btn)
|
|
114
|
+
img = self._get_downloaded_file()
|
|
115
|
+
self.sleep(1)
|
|
116
|
+
ActionChains(self.driver).send_keys(Keys.ESCAPE).perform()
|
|
117
|
+
self.sleep(0.5)
|
|
118
|
+
return img
|
|
119
|
+
|
|
120
|
+
def _get_text(element: WebElement, get_markdown: bool):
|
|
121
|
+
elem = element.find_element(By.CSS_SELECTOR, ".markdown")
|
|
122
|
+
inner_text = elem.text.strip()
|
|
123
|
+
if inner_text == "":
|
|
124
|
+
return None
|
|
125
|
+
if not get_markdown:
|
|
126
|
+
return inner_text
|
|
127
|
+
element.find_element(
|
|
128
|
+
By.CSS_SELECTOR, 'button[aria-label="Copy response"]'
|
|
129
|
+
).click()
|
|
130
|
+
self.sleep(0.5)
|
|
131
|
+
return pyperclip.paste()
|
|
132
|
+
|
|
133
|
+
responses = wait.until(
|
|
134
|
+
EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".agent-turn"))
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
if not responses:
|
|
138
|
+
raise TimeoutException("No responses found in the chat.")
|
|
139
|
+
|
|
140
|
+
last_response = responses[-1]
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
text_content = _get_text(last_response, get_markdown)
|
|
144
|
+
except NoSuchElementException:
|
|
145
|
+
text_content = None
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
img = _get_img(last_response)
|
|
149
|
+
except NoSuchElementException:
|
|
150
|
+
img = None
|
|
151
|
+
|
|
152
|
+
if text_content is None and img is None:
|
|
153
|
+
raise RuntimeError("Response contained neither text nor image.")
|
|
154
|
+
|
|
155
|
+
return AssistantMessage(text=text_content, image=img)
|
|
156
|
+
|
|
157
|
+
def get_state(self) -> State:
|
|
158
|
+
if self.driver.find_elements(By.CSS_SELECTOR, '[data-testid="stop-button"]'):
|
|
159
|
+
return State.GENERATING
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
send_btn = self.driver.find_element(
|
|
163
|
+
By.CSS_SELECTOR, '[data-testid="send-button"]'
|
|
164
|
+
)
|
|
165
|
+
except NoSuchElementException:
|
|
166
|
+
return State.IDLE
|
|
167
|
+
|
|
168
|
+
if send_btn.get_attribute("disabled"):
|
|
169
|
+
return State.UPLOADING
|
|
170
|
+
return State.TYPING
|