aichat2md 1.0.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aichat2md/__init__.py +1 -1
- aichat2md/cli.py +23 -5
- aichat2md/extractors/playwright_extractor.py +59 -9
- aichat2md/structurizer.py +3 -1
- {aichat2md-1.0.1.dist-info → aichat2md-1.2.0.dist-info}/METADATA +27 -2
- aichat2md-1.2.0.dist-info/RECORD +16 -0
- aichat2md-1.0.1.dist-info/RECORD +0 -16
- {aichat2md-1.0.1.dist-info → aichat2md-1.2.0.dist-info}/WHEEL +0 -0
- {aichat2md-1.0.1.dist-info → aichat2md-1.2.0.dist-info}/entry_points.txt +0 -0
- {aichat2md-1.0.1.dist-info → aichat2md-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {aichat2md-1.0.1.dist-info → aichat2md-1.2.0.dist-info}/top_level.txt +0 -0
aichat2md/__init__.py
CHANGED
aichat2md/cli.py
CHANGED
|
@@ -15,6 +15,9 @@ import sys
|
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from typing import Tuple
|
|
18
|
+
import time
|
|
19
|
+
|
|
20
|
+
from yaspin import yaspin
|
|
18
21
|
|
|
19
22
|
from .config import setup_config, load_config
|
|
20
23
|
from .extractors.playwright_extractor import extract_from_url
|
|
@@ -23,6 +26,17 @@ from .structurizer import structurize_content
|
|
|
23
26
|
from . import __version__
|
|
24
27
|
|
|
25
28
|
|
|
29
|
+
class TimedText:
|
|
30
|
+
"""Dynamic text with elapsed time in seconds."""
|
|
31
|
+
def __init__(self, text: str):
|
|
32
|
+
self.text = text
|
|
33
|
+
self._start = time.time()
|
|
34
|
+
|
|
35
|
+
def __str__(self):
|
|
36
|
+
elapsed = int(time.time() - self._start)
|
|
37
|
+
return f"[{elapsed}s] {self.text}"
|
|
38
|
+
|
|
39
|
+
|
|
26
40
|
def sanitize_filename(title: str, max_length: int = 50) -> str:
|
|
27
41
|
"""
|
|
28
42
|
Sanitize title for use as filename.
|
|
@@ -87,15 +101,17 @@ def extract_content(input_path: str) -> Tuple[str, str]:
|
|
|
87
101
|
Tuple of (extracted_text, source_identifier)
|
|
88
102
|
"""
|
|
89
103
|
if input_path.startswith('http'):
|
|
90
|
-
|
|
91
|
-
|
|
104
|
+
with yaspin(text=TimedText(f"Extracting from URL (up to 60s): {input_path}")) as sp:
|
|
105
|
+
text = extract_from_url(input_path)
|
|
106
|
+
sp.ok(f"✓ Extracted {len(text)} characters")
|
|
92
107
|
source = input_path
|
|
93
108
|
else:
|
|
109
|
+
# Webarchive extraction is fast, no spinner needed
|
|
94
110
|
print(f"📄 Extracting from webarchive: {input_path}")
|
|
95
111
|
text = extract_from_webarchive(input_path)
|
|
112
|
+
print(f"✓ Extracted {len(text)} characters")
|
|
96
113
|
source = Path(input_path).name
|
|
97
114
|
|
|
98
|
-
print(f"✓ Extracted {len(text)} characters")
|
|
99
115
|
return text, source
|
|
100
116
|
|
|
101
117
|
|
|
@@ -221,8 +237,10 @@ Examples:
|
|
|
221
237
|
|
|
222
238
|
# Structurize with AI
|
|
223
239
|
provider = config.get("api_base_url", "API")
|
|
224
|
-
|
|
225
|
-
|
|
240
|
+
estimated = min(60 + len(raw_text) // 100, 600)
|
|
241
|
+
with yaspin(text=TimedText(f"Structurizing {len(raw_text)} chars with {provider} (~{estimated}s)")) as sp:
|
|
242
|
+
markdown = structurize_content(raw_text, config, source)
|
|
243
|
+
sp.ok("✓ Structurized")
|
|
226
244
|
|
|
227
245
|
# Determine output path
|
|
228
246
|
output_path = determine_output_path(args.input, markdown, config, args.output)
|
|
@@ -1,14 +1,51 @@
|
|
|
1
|
-
"""Extract content from
|
|
1
|
+
"""Extract content from AI chat share URLs using Playwright."""
|
|
2
2
|
|
|
3
3
|
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def
|
|
6
|
+
def _detect_platform(url: str) -> str:
|
|
7
7
|
"""
|
|
8
|
-
|
|
8
|
+
Detect platform from URL.
|
|
9
9
|
|
|
10
10
|
Args:
|
|
11
|
-
url:
|
|
11
|
+
url: Share URL
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Platform name: 'doubao', 'gemini', or 'default'
|
|
15
|
+
"""
|
|
16
|
+
url_lower = url.lower()
|
|
17
|
+
if 'doubao.com' in url_lower:
|
|
18
|
+
return 'doubao'
|
|
19
|
+
elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
|
|
20
|
+
return 'gemini'
|
|
21
|
+
else:
|
|
22
|
+
return 'default'
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _get_wait_time(platform: str) -> int:
|
|
26
|
+
"""
|
|
27
|
+
Get wait time in milliseconds for platform.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
platform: Platform name from _detect_platform
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Wait time in milliseconds
|
|
34
|
+
"""
|
|
35
|
+
wait_times = {
|
|
36
|
+
'doubao': 3000,
|
|
37
|
+
'gemini': 5000,
|
|
38
|
+
'default': 2000
|
|
39
|
+
}
|
|
40
|
+
return wait_times.get(platform, 2000)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def extract_from_url(url: str, timeout: int = 60000) -> str:
|
|
44
|
+
"""
|
|
45
|
+
Extract text content from AI chat share URL.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
url: Share URL (ChatGPT, Gemini, Doubao, etc.)
|
|
12
49
|
timeout: Page load timeout in milliseconds
|
|
13
50
|
|
|
14
51
|
Returns:
|
|
@@ -21,17 +58,30 @@ def extract_from_url(url: str, timeout: int = 30000) -> str:
|
|
|
21
58
|
if not url.startswith('http'):
|
|
22
59
|
raise ValueError(f"Invalid URL: {url}")
|
|
23
60
|
|
|
61
|
+
# Detect platform and get corresponding wait time
|
|
62
|
+
platform = _detect_platform(url)
|
|
63
|
+
wait_time = _get_wait_time(platform)
|
|
64
|
+
|
|
24
65
|
try:
|
|
25
66
|
with sync_playwright() as p:
|
|
26
67
|
browser = p.chromium.launch(headless=True)
|
|
27
68
|
page = browser.new_page()
|
|
28
69
|
|
|
29
|
-
# Navigate
|
|
30
|
-
|
|
70
|
+
# Navigate with appropriate wait strategy
|
|
71
|
+
# Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
|
|
72
|
+
wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
|
|
73
|
+
page.goto(url, wait_until=wait_strategy, timeout=60000)
|
|
74
|
+
|
|
75
|
+
# Wait for content to load
|
|
76
|
+
# Try to wait for main selector (works for ChatGPT)
|
|
77
|
+
try:
|
|
78
|
+
page.wait_for_selector('main', timeout=10000)
|
|
79
|
+
except PlaywrightTimeoutError:
|
|
80
|
+
# Some platforms may not have 'main' element, continue anyway
|
|
81
|
+
pass
|
|
31
82
|
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
page.wait_for_selector('main', timeout=10000)
|
|
83
|
+
# Additional wait for dynamic content based on platform
|
|
84
|
+
page.wait_for_timeout(wait_time)
|
|
35
85
|
|
|
36
86
|
# Extract plain text from body
|
|
37
87
|
content = page.inner_text('body')
|
aichat2md/structurizer.py
CHANGED
|
@@ -81,7 +81,9 @@ def structurize_content(
|
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
try:
|
|
84
|
-
|
|
84
|
+
# Dynamic timeout based on content size: 60s base + 1s per 100 chars, max 600s
|
|
85
|
+
estimated_timeout = min(60 + len(raw_text) // 100, 600)
|
|
86
|
+
response = requests.post(api_url, headers=headers, json=payload, timeout=estimated_timeout)
|
|
85
87
|
response.raise_for_status()
|
|
86
88
|
|
|
87
89
|
result = response.json()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aichat2md
|
|
3
|
-
Version: 1.0
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Convert AI chat conversations to structured Markdown
|
|
5
5
|
Author: PlaceNameDay
|
|
6
6
|
License: MIT
|
|
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: playwright>=1.40.0
|
|
26
26
|
Requires-Dist: requests>=2.31.0
|
|
27
|
+
Requires-Dist: yaspin>=3.0.0
|
|
27
28
|
Dynamic: license-file
|
|
28
29
|
|
|
29
30
|
# aichat2md
|
|
@@ -32,7 +33,7 @@ Convert AI chat conversations to structured Markdown documents.
|
|
|
32
33
|
|
|
33
34
|
## Features
|
|
34
35
|
|
|
35
|
-
- 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
|
|
36
|
+
- 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
|
|
36
37
|
- 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
|
|
37
38
|
- 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
|
|
38
39
|
- 🌍 **Bilingual support** - English/Chinese prompts
|
|
@@ -55,6 +56,30 @@ aichat2md https://chatgpt.com/share/xxx
|
|
|
55
56
|
aichat2md ~/Downloads/chat.webarchive
|
|
56
57
|
```
|
|
57
58
|
|
|
59
|
+
## Supported Platforms
|
|
60
|
+
|
|
61
|
+
- **ChatGPT** - chatgpt.com share links
|
|
62
|
+
- **Gemini** - gemini.google.com or g.co share links
|
|
63
|
+
- **Doubao (豆包)** - doubao.com share links
|
|
64
|
+
- **Webarchive** - Safari exported .webarchive files (any platform)
|
|
65
|
+
|
|
66
|
+
### Usage Examples
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# ChatGPT
|
|
70
|
+
aichat2md https://chatgpt.com/share/xxx
|
|
71
|
+
|
|
72
|
+
# Gemini (supports both long and short URLs)
|
|
73
|
+
aichat2md https://gemini.google.com/share/xxx
|
|
74
|
+
aichat2md https://g.co/gemini/share/xxx
|
|
75
|
+
|
|
76
|
+
# Doubao
|
|
77
|
+
aichat2md https://www.doubao.com/thread/xxx
|
|
78
|
+
|
|
79
|
+
# Webarchive file
|
|
80
|
+
aichat2md ~/Downloads/conversation.webarchive
|
|
81
|
+
```
|
|
82
|
+
|
|
58
83
|
## Supported AI Backends
|
|
59
84
|
|
|
60
85
|
- **DeepSeek** (default) - Cost-effective, Chinese service
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
aichat2md/__init__.py,sha256=Svo2ZbRmmQpv6qMJ67s5OVCsKD-Z04C5eBAwgGeLE1o,196
|
|
2
|
+
aichat2md/cli.py,sha256=YI5kXkPAhxuFmKw5NV2J8_VwG0VoiyrCNkKkEmaVA3M,7545
|
|
3
|
+
aichat2md/config.py,sha256=VO4fA_ByRKVRPa61W3VwIBjMPDsMt3iagFP2NkBSU7U,4351
|
|
4
|
+
aichat2md/structurizer.py,sha256=rxK1qxm7RevfTlOPAyvzzL3CNb07dwjfn15V3BbQQ4I,4379
|
|
5
|
+
aichat2md/extractors/__init__.py,sha256=HzIWd2aZBACnWs2N2pPjIa7vjM-azPz-bqEviN0QgTs,217
|
|
6
|
+
aichat2md/extractors/playwright_extractor.py,sha256=GA7IoyuAKNWWX1-iw1en7yF0V-yW4tHZdiLvQTW9Ags,3064
|
|
7
|
+
aichat2md/extractors/webarchive_extractor.py,sha256=eIZIVzLlBgO41Yzz8EKmjA8Diq3btlQO8S5mljDQWfs,2842
|
|
8
|
+
aichat2md/prompts/__init__.py,sha256=cPdhDyL1QeVhl5gVFYb50zYMi24iGmxz6R_rrVy1-yk,48
|
|
9
|
+
aichat2md/prompts/system_prompt_en.txt,sha256=luB5o84AQOqCkBq0lM3KsrK_yyCO9yaYu8iqgg3lXoY,1488
|
|
10
|
+
aichat2md/prompts/system_prompt_zh.txt,sha256=UxiVgf2kUFp-iXA15nPDKBG7xdQAnhy3q9g5ki7bEPU,1344
|
|
11
|
+
aichat2md-1.2.0.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
|
|
12
|
+
aichat2md-1.2.0.dist-info/METADATA,sha256=RX8SuEuYogm9jNj-pqGRYdp0h6pRHwD2VoUILASNexE,6902
|
|
13
|
+
aichat2md-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
14
|
+
aichat2md-1.2.0.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
|
|
15
|
+
aichat2md-1.2.0.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
|
|
16
|
+
aichat2md-1.2.0.dist-info/RECORD,,
|
aichat2md-1.0.1.dist-info/RECORD
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
aichat2md/__init__.py,sha256=DUbRGVZhGAyn7omeAomaWvlMQOLxhvgJeLhD5LIrNNY,196
|
|
2
|
-
aichat2md/cli.py,sha256=bu_lnD85xLB-xKA04iMwj4WgKC0IkkJsHMnsJSA65H4,6905
|
|
3
|
-
aichat2md/config.py,sha256=VO4fA_ByRKVRPa61W3VwIBjMPDsMt3iagFP2NkBSU7U,4351
|
|
4
|
-
aichat2md/structurizer.py,sha256=0v1Hjo9KYcurBEaKJNt4MaqfVHzgEfHmH-KbIPO1Zcg,4213
|
|
5
|
-
aichat2md/extractors/__init__.py,sha256=HzIWd2aZBACnWs2N2pPjIa7vjM-azPz-bqEviN0QgTs,217
|
|
6
|
-
aichat2md/extractors/playwright_extractor.py,sha256=eB3VLogTnv6uYm3DAfT_8t6CmIsyt3SIBo0Slgd7Rc4,1752
|
|
7
|
-
aichat2md/extractors/webarchive_extractor.py,sha256=eIZIVzLlBgO41Yzz8EKmjA8Diq3btlQO8S5mljDQWfs,2842
|
|
8
|
-
aichat2md/prompts/__init__.py,sha256=cPdhDyL1QeVhl5gVFYb50zYMi24iGmxz6R_rrVy1-yk,48
|
|
9
|
-
aichat2md/prompts/system_prompt_en.txt,sha256=luB5o84AQOqCkBq0lM3KsrK_yyCO9yaYu8iqgg3lXoY,1488
|
|
10
|
-
aichat2md/prompts/system_prompt_zh.txt,sha256=UxiVgf2kUFp-iXA15nPDKBG7xdQAnhy3q9g5ki7bEPU,1344
|
|
11
|
-
aichat2md-1.0.1.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
|
|
12
|
-
aichat2md-1.0.1.dist-info/METADATA,sha256=PeiizU00Fmlrf_9YAuB_gJ38Vu1ZdhYfvta_7SRbHgY,6290
|
|
13
|
-
aichat2md-1.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
14
|
-
aichat2md-1.0.1.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
|
|
15
|
-
aichat2md-1.0.1.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
|
|
16
|
-
aichat2md-1.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|