aichat2md 1.0.1__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aichat2md/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """aichat2md - Convert AI chat conversations to structured Markdown."""
2
2
 
3
- __version__ = "1.0.1"
3
+ __version__ = "1.1.0"
4
4
  __author__ = "PlaceNameDay"
5
5
  __description__ = "Convert AI chat conversations to structured Markdown"
@@ -1,14 +1,51 @@
1
- """Extract content from ChatGPT share URLs using Playwright."""
1
+ """Extract content from AI chat share URLs using Playwright."""
2
2
 
3
3
  from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
4
 
5
5
 
6
+ def _detect_platform(url: str) -> str:
7
+ """
8
+ Detect platform from URL.
9
+
10
+ Args:
11
+ url: Share URL
12
+
13
+ Returns:
14
+ Platform name: 'doubao', 'gemini', or 'default'
15
+ """
16
+ url_lower = url.lower()
17
+ if 'doubao.com' in url_lower:
18
+ return 'doubao'
19
+ elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
20
+ return 'gemini'
21
+ else:
22
+ return 'default'
23
+
24
+
25
+ def _get_wait_time(platform: str) -> int:
26
+ """
27
+ Get wait time in milliseconds for platform.
28
+
29
+ Args:
30
+ platform: Platform name from _detect_platform
31
+
32
+ Returns:
33
+ Wait time in milliseconds
34
+ """
35
+ wait_times = {
36
+ 'doubao': 3000,
37
+ 'gemini': 5000,
38
+ 'default': 2000
39
+ }
40
+ return wait_times.get(platform, 2000)
41
+
42
+
6
43
  def extract_from_url(url: str, timeout: int = 30000) -> str:
7
44
  """
8
- Extract text content from ChatGPT share URL.
45
+ Extract text content from AI chat share URL.
9
46
 
10
47
  Args:
11
- url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
48
+ url: Share URL (ChatGPT, Gemini, Doubao, etc.)
12
49
  timeout: Page load timeout in milliseconds
13
50
 
14
51
  Returns:
@@ -21,17 +58,30 @@ def extract_from_url(url: str, timeout: int = 30000) -> str:
21
58
  if not url.startswith('http'):
22
59
  raise ValueError(f"Invalid URL: {url}")
23
60
 
61
+ # Detect platform and get corresponding wait time
62
+ platform = _detect_platform(url)
63
+ wait_time = _get_wait_time(platform)
64
+
24
65
  try:
25
66
  with sync_playwright() as p:
26
67
  browser = p.chromium.launch(headless=True)
27
68
  page = browser.new_page()
28
69
 
29
- # Navigate and wait for network idle
30
- page.goto(url, wait_until='networkidle', timeout=timeout)
70
+ # Navigate with appropriate wait strategy
71
+ # Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
72
+ wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
73
+ page.goto(url, wait_until=wait_strategy, timeout=timeout)
74
+
75
+ # Wait for content to load
76
+ # Try to wait for main selector (works for ChatGPT)
77
+ try:
78
+ page.wait_for_selector('main', timeout=10000)
79
+ except PlaywrightTimeoutError:
80
+ # Some platforms may not have 'main' element, continue anyway
81
+ pass
31
82
 
32
- # Wait for conversation content to load
33
- # ChatGPT share pages typically have conversation in main content area
34
- page.wait_for_selector('main', timeout=10000)
83
+ # Additional wait for dynamic content based on platform
84
+ page.wait_for_timeout(wait_time)
35
85
 
36
86
  # Extract plain text from body
37
87
  content = page.inner_text('body')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
@@ -32,7 +32,7 @@ Convert AI chat conversations to structured Markdown documents.
32
32
 
33
33
  ## Features
34
34
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
35
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
36
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
37
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
38
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +55,30 @@ aichat2md https://chatgpt.com/share/xxx
55
55
  aichat2md ~/Downloads/chat.webarchive
56
56
  ```
57
57
 
58
+ ## Supported Platforms
59
+
60
+ - **ChatGPT** - chatgpt.com share links
61
+ - **Gemini** - gemini.google.com or g.co share links
62
+ - **Doubao (豆包)** - doubao.com share links
63
+ - **Webarchive** - Safari exported .webarchive files (any platform)
64
+
65
+ ### Usage Examples
66
+
67
+ ```bash
68
+ # ChatGPT
69
+ aichat2md https://chatgpt.com/share/xxx
70
+
71
+ # Gemini (supports both long and short URLs)
72
+ aichat2md https://gemini.google.com/share/xxx
73
+ aichat2md https://g.co/gemini/share/xxx
74
+
75
+ # Doubao
76
+ aichat2md https://www.doubao.com/thread/xxx
77
+
78
+ # Webarchive file
79
+ aichat2md ~/Downloads/conversation.webarchive
80
+ ```
81
+
58
82
  ## Supported AI Backends
59
83
 
60
84
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -1,16 +1,16 @@
1
- aichat2md/__init__.py,sha256=DUbRGVZhGAyn7omeAomaWvlMQOLxhvgJeLhD5LIrNNY,196
1
+ aichat2md/__init__.py,sha256=sxLgNjR13nToItNNkTgYXZAgeKNi2kfzjJGov_P_NBk,196
2
2
  aichat2md/cli.py,sha256=bu_lnD85xLB-xKA04iMwj4WgKC0IkkJsHMnsJSA65H4,6905
3
3
  aichat2md/config.py,sha256=VO4fA_ByRKVRPa61W3VwIBjMPDsMt3iagFP2NkBSU7U,4351
4
4
  aichat2md/structurizer.py,sha256=0v1Hjo9KYcurBEaKJNt4MaqfVHzgEfHmH-KbIPO1Zcg,4213
5
5
  aichat2md/extractors/__init__.py,sha256=HzIWd2aZBACnWs2N2pPjIa7vjM-azPz-bqEviN0QgTs,217
6
- aichat2md/extractors/playwright_extractor.py,sha256=eB3VLogTnv6uYm3DAfT_8t6CmIsyt3SIBo0Slgd7Rc4,1752
6
+ aichat2md/extractors/playwright_extractor.py,sha256=mUfeo8A_NzHgtzTuLvwmBZXwPgvw7H7cSDUup4MS7q4,3066
7
7
  aichat2md/extractors/webarchive_extractor.py,sha256=eIZIVzLlBgO41Yzz8EKmjA8Diq3btlQO8S5mljDQWfs,2842
8
8
  aichat2md/prompts/__init__.py,sha256=cPdhDyL1QeVhl5gVFYb50zYMi24iGmxz6R_rrVy1-yk,48
9
9
  aichat2md/prompts/system_prompt_en.txt,sha256=luB5o84AQOqCkBq0lM3KsrK_yyCO9yaYu8iqgg3lXoY,1488
10
10
  aichat2md/prompts/system_prompt_zh.txt,sha256=UxiVgf2kUFp-iXA15nPDKBG7xdQAnhy3q9g5ki7bEPU,1344
11
- aichat2md-1.0.1.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
12
- aichat2md-1.0.1.dist-info/METADATA,sha256=PeiizU00Fmlrf_9YAuB_gJ38Vu1ZdhYfvta_7SRbHgY,6290
13
- aichat2md-1.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
14
- aichat2md-1.0.1.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
15
- aichat2md-1.0.1.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
16
- aichat2md-1.0.1.dist-info/RECORD,,
11
+ aichat2md-1.1.0.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
12
+ aichat2md-1.1.0.dist-info/METADATA,sha256=OxLygIoC-ScwCBedgHambY6kwddcBJaYDktXDqSxPfE,6873
13
+ aichat2md-1.1.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
14
+ aichat2md-1.1.0.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
15
+ aichat2md-1.1.0.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
16
+ aichat2md-1.1.0.dist-info/RECORD,,