aichat2md 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aichat2md/__init__.py +1 -1
- aichat2md/extractors/playwright_extractor.py +58 -8
- {aichat2md-1.0.1.dist-info → aichat2md-1.1.0.dist-info}/METADATA +26 -2
- {aichat2md-1.0.1.dist-info → aichat2md-1.1.0.dist-info}/RECORD +8 -8
- {aichat2md-1.0.1.dist-info → aichat2md-1.1.0.dist-info}/WHEEL +0 -0
- {aichat2md-1.0.1.dist-info → aichat2md-1.1.0.dist-info}/entry_points.txt +0 -0
- {aichat2md-1.0.1.dist-info → aichat2md-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {aichat2md-1.0.1.dist-info → aichat2md-1.1.0.dist-info}/top_level.txt +0 -0
aichat2md/__init__.py
CHANGED
|
@@ -1,14 +1,51 @@
|
|
|
1
|
-
"""Extract content from
|
|
1
|
+
"""Extract content from AI chat share URLs using Playwright."""
|
|
2
2
|
|
|
3
3
|
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
|
4
4
|
|
|
5
5
|
|
|
6
|
+
def _detect_platform(url: str) -> str:
|
|
7
|
+
"""
|
|
8
|
+
Detect platform from URL.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
url: Share URL
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Platform name: 'doubao', 'gemini', or 'default'
|
|
15
|
+
"""
|
|
16
|
+
url_lower = url.lower()
|
|
17
|
+
if 'doubao.com' in url_lower:
|
|
18
|
+
return 'doubao'
|
|
19
|
+
elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
|
|
20
|
+
return 'gemini'
|
|
21
|
+
else:
|
|
22
|
+
return 'default'
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _get_wait_time(platform: str) -> int:
|
|
26
|
+
"""
|
|
27
|
+
Get wait time in milliseconds for platform.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
platform: Platform name from _detect_platform
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Wait time in milliseconds
|
|
34
|
+
"""
|
|
35
|
+
wait_times = {
|
|
36
|
+
'doubao': 3000,
|
|
37
|
+
'gemini': 5000,
|
|
38
|
+
'default': 2000
|
|
39
|
+
}
|
|
40
|
+
return wait_times.get(platform, 2000)
|
|
41
|
+
|
|
42
|
+
|
|
6
43
|
def extract_from_url(url: str, timeout: int = 30000) -> str:
|
|
7
44
|
"""
|
|
8
|
-
Extract text content from
|
|
45
|
+
Extract text content from AI chat share URL.
|
|
9
46
|
|
|
10
47
|
Args:
|
|
11
|
-
url:
|
|
48
|
+
url: Share URL (ChatGPT, Gemini, Doubao, etc.)
|
|
12
49
|
timeout: Page load timeout in milliseconds
|
|
13
50
|
|
|
14
51
|
Returns:
|
|
@@ -21,17 +58,30 @@ def extract_from_url(url: str, timeout: int = 30000) -> str:
|
|
|
21
58
|
if not url.startswith('http'):
|
|
22
59
|
raise ValueError(f"Invalid URL: {url}")
|
|
23
60
|
|
|
61
|
+
# Detect platform and get corresponding wait time
|
|
62
|
+
platform = _detect_platform(url)
|
|
63
|
+
wait_time = _get_wait_time(platform)
|
|
64
|
+
|
|
24
65
|
try:
|
|
25
66
|
with sync_playwright() as p:
|
|
26
67
|
browser = p.chromium.launch(headless=True)
|
|
27
68
|
page = browser.new_page()
|
|
28
69
|
|
|
29
|
-
# Navigate
|
|
30
|
-
|
|
70
|
+
# Navigate with appropriate wait strategy
|
|
71
|
+
# Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
|
|
72
|
+
wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
|
|
73
|
+
page.goto(url, wait_until=wait_strategy, timeout=timeout)
|
|
74
|
+
|
|
75
|
+
# Wait for content to load
|
|
76
|
+
# Try to wait for main selector (works for ChatGPT)
|
|
77
|
+
try:
|
|
78
|
+
page.wait_for_selector('main', timeout=10000)
|
|
79
|
+
except PlaywrightTimeoutError:
|
|
80
|
+
# Some platforms may not have 'main' element, continue anyway
|
|
81
|
+
pass
|
|
31
82
|
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
page.wait_for_selector('main', timeout=10000)
|
|
83
|
+
# Additional wait for dynamic content based on platform
|
|
84
|
+
page.wait_for_timeout(wait_time)
|
|
35
85
|
|
|
36
86
|
# Extract plain text from body
|
|
37
87
|
content = page.inner_text('body')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aichat2md
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Convert AI chat conversations to structured Markdown
|
|
5
5
|
Author: PlaceNameDay
|
|
6
6
|
License: MIT
|
|
@@ -32,7 +32,7 @@ Convert AI chat conversations to structured Markdown documents.
|
|
|
32
32
|
|
|
33
33
|
## Features
|
|
34
34
|
|
|
35
|
-
- 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
|
|
35
|
+
- 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
|
|
36
36
|
- 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
|
|
37
37
|
- 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
|
|
38
38
|
- 🌍 **Bilingual support** - English/Chinese prompts
|
|
@@ -55,6 +55,30 @@ aichat2md https://chatgpt.com/share/xxx
|
|
|
55
55
|
aichat2md ~/Downloads/chat.webarchive
|
|
56
56
|
```
|
|
57
57
|
|
|
58
|
+
## Supported Platforms
|
|
59
|
+
|
|
60
|
+
- **ChatGPT** - chatgpt.com share links
|
|
61
|
+
- **Gemini** - gemini.google.com or g.co share links
|
|
62
|
+
- **Doubao (豆包)** - doubao.com share links
|
|
63
|
+
- **Webarchive** - Safari exported .webarchive files (any platform)
|
|
64
|
+
|
|
65
|
+
### Usage Examples
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
# ChatGPT
|
|
69
|
+
aichat2md https://chatgpt.com/share/xxx
|
|
70
|
+
|
|
71
|
+
# Gemini (supports both long and short URLs)
|
|
72
|
+
aichat2md https://gemini.google.com/share/xxx
|
|
73
|
+
aichat2md https://g.co/gemini/share/xxx
|
|
74
|
+
|
|
75
|
+
# Doubao
|
|
76
|
+
aichat2md https://www.doubao.com/thread/xxx
|
|
77
|
+
|
|
78
|
+
# Webarchive file
|
|
79
|
+
aichat2md ~/Downloads/conversation.webarchive
|
|
80
|
+
```
|
|
81
|
+
|
|
58
82
|
## Supported AI Backends
|
|
59
83
|
|
|
60
84
|
- **DeepSeek** (default) - Cost-effective, Chinese service
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
aichat2md/__init__.py,sha256=
|
|
1
|
+
aichat2md/__init__.py,sha256=sxLgNjR13nToItNNkTgYXZAgeKNi2kfzjJGov_P_NBk,196
|
|
2
2
|
aichat2md/cli.py,sha256=bu_lnD85xLB-xKA04iMwj4WgKC0IkkJsHMnsJSA65H4,6905
|
|
3
3
|
aichat2md/config.py,sha256=VO4fA_ByRKVRPa61W3VwIBjMPDsMt3iagFP2NkBSU7U,4351
|
|
4
4
|
aichat2md/structurizer.py,sha256=0v1Hjo9KYcurBEaKJNt4MaqfVHzgEfHmH-KbIPO1Zcg,4213
|
|
5
5
|
aichat2md/extractors/__init__.py,sha256=HzIWd2aZBACnWs2N2pPjIa7vjM-azPz-bqEviN0QgTs,217
|
|
6
|
-
aichat2md/extractors/playwright_extractor.py,sha256=
|
|
6
|
+
aichat2md/extractors/playwright_extractor.py,sha256=mUfeo8A_NzHgtzTuLvwmBZXwPgvw7H7cSDUup4MS7q4,3066
|
|
7
7
|
aichat2md/extractors/webarchive_extractor.py,sha256=eIZIVzLlBgO41Yzz8EKmjA8Diq3btlQO8S5mljDQWfs,2842
|
|
8
8
|
aichat2md/prompts/__init__.py,sha256=cPdhDyL1QeVhl5gVFYb50zYMi24iGmxz6R_rrVy1-yk,48
|
|
9
9
|
aichat2md/prompts/system_prompt_en.txt,sha256=luB5o84AQOqCkBq0lM3KsrK_yyCO9yaYu8iqgg3lXoY,1488
|
|
10
10
|
aichat2md/prompts/system_prompt_zh.txt,sha256=UxiVgf2kUFp-iXA15nPDKBG7xdQAnhy3q9g5ki7bEPU,1344
|
|
11
|
-
aichat2md-1.0.
|
|
12
|
-
aichat2md-1.0.
|
|
13
|
-
aichat2md-1.0.
|
|
14
|
-
aichat2md-1.0.
|
|
15
|
-
aichat2md-1.0.
|
|
16
|
-
aichat2md-1.0.
|
|
11
|
+
aichat2md-1.1.0.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
|
|
12
|
+
aichat2md-1.1.0.dist-info/METADATA,sha256=OxLygIoC-ScwCBedgHambY6kwddcBJaYDktXDqSxPfE,6873
|
|
13
|
+
aichat2md-1.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
14
|
+
aichat2md-1.1.0.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
|
|
15
|
+
aichat2md-1.1.0.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
|
|
16
|
+
aichat2md-1.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|