aichat2md 1.0.1__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {aichat2md-1.0.1 → aichat2md-1.1.0}/PKG-INFO +26 -2
  2. {aichat2md-1.0.1 → aichat2md-1.1.0}/README.md +25 -1
  3. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/__init__.py +1 -1
  4. aichat2md-1.1.0/aichat2md/extractors/playwright_extractor.py +108 -0
  5. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md.egg-info/PKG-INFO +26 -2
  6. {aichat2md-1.0.1 → aichat2md-1.1.0}/pyproject.toml +1 -1
  7. aichat2md-1.0.1/aichat2md/extractors/playwright_extractor.py +0 -58
  8. {aichat2md-1.0.1 → aichat2md-1.1.0}/LICENSE +0 -0
  9. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/cli.py +0 -0
  10. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/config.py +0 -0
  11. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/extractors/__init__.py +0 -0
  12. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/extractors/webarchive_extractor.py +0 -0
  13. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/prompts/__init__.py +0 -0
  14. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/prompts/system_prompt_en.txt +0 -0
  15. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/prompts/system_prompt_zh.txt +0 -0
  16. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md/structurizer.py +0 -0
  17. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md.egg-info/SOURCES.txt +0 -0
  18. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md.egg-info/dependency_links.txt +0 -0
  19. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md.egg-info/entry_points.txt +0 -0
  20. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md.egg-info/requires.txt +0 -0
  21. {aichat2md-1.0.1 → aichat2md-1.1.0}/aichat2md.egg-info/top_level.txt +0 -0
  22. {aichat2md-1.0.1 → aichat2md-1.1.0}/setup.cfg +0 -0
  23. {aichat2md-1.0.1 → aichat2md-1.1.0}/tests/test_cli.py +0 -0
  24. {aichat2md-1.0.1 → aichat2md-1.1.0}/tests/test_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
@@ -32,7 +32,7 @@ Convert AI chat conversations to structured Markdown documents.
32
32
 
33
33
  ## Features
34
34
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
35
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
36
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
37
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
38
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +55,30 @@ aichat2md https://chatgpt.com/share/xxx
55
55
  aichat2md ~/Downloads/chat.webarchive
56
56
  ```
57
57
 
58
+ ## Supported Platforms
59
+
60
+ - **ChatGPT** - chatgpt.com share links
61
+ - **Gemini** - gemini.google.com or g.co share links
62
+ - **Doubao (豆包)** - doubao.com share links
63
+ - **Webarchive** - Safari exported .webarchive files (any platform)
64
+
65
+ ### Usage Examples
66
+
67
+ ```bash
68
+ # ChatGPT
69
+ aichat2md https://chatgpt.com/share/xxx
70
+
71
+ # Gemini (supports both long and short URLs)
72
+ aichat2md https://gemini.google.com/share/xxx
73
+ aichat2md https://g.co/gemini/share/xxx
74
+
75
+ # Doubao
76
+ aichat2md https://www.doubao.com/thread/xxx
77
+
78
+ # Webarchive file
79
+ aichat2md ~/Downloads/conversation.webarchive
80
+ ```
81
+
58
82
  ## Supported AI Backends
59
83
 
60
84
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -4,7 +4,7 @@ Convert AI chat conversations to structured Markdown documents.
4
4
 
5
5
  ## Features
6
6
 
7
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
7
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
8
8
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
9
9
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
10
10
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -27,6 +27,30 @@ aichat2md https://chatgpt.com/share/xxx
27
27
  aichat2md ~/Downloads/chat.webarchive
28
28
  ```
29
29
 
30
+ ## Supported Platforms
31
+
32
+ - **ChatGPT** - chatgpt.com share links
33
+ - **Gemini** - gemini.google.com or g.co share links
34
+ - **Doubao (豆包)** - doubao.com share links
35
+ - **Webarchive** - Safari exported .webarchive files (any platform)
36
+
37
+ ### Usage Examples
38
+
39
+ ```bash
40
+ # ChatGPT
41
+ aichat2md https://chatgpt.com/share/xxx
42
+
43
+ # Gemini (supports both long and short URLs)
44
+ aichat2md https://gemini.google.com/share/xxx
45
+ aichat2md https://g.co/gemini/share/xxx
46
+
47
+ # Doubao
48
+ aichat2md https://www.doubao.com/thread/xxx
49
+
50
+ # Webarchive file
51
+ aichat2md ~/Downloads/conversation.webarchive
52
+ ```
53
+
30
54
  ## Supported AI Backends
31
55
 
32
56
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -1,5 +1,5 @@
1
1
  """aichat2md - Convert AI chat conversations to structured Markdown."""
2
2
 
3
- __version__ = "1.0.1"
3
+ __version__ = "1.1.0"
4
4
  __author__ = "PlaceNameDay"
5
5
  __description__ = "Convert AI chat conversations to structured Markdown"
@@ -0,0 +1,108 @@
1
+ """Extract content from AI chat share URLs using Playwright."""
2
+
3
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
+
5
+
6
+ def _detect_platform(url: str) -> str:
7
+ """
8
+ Detect platform from URL.
9
+
10
+ Args:
11
+ url: Share URL
12
+
13
+ Returns:
14
+ Platform name: 'doubao', 'gemini', or 'default'
15
+ """
16
+ url_lower = url.lower()
17
+ if 'doubao.com' in url_lower:
18
+ return 'doubao'
19
+ elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
20
+ return 'gemini'
21
+ else:
22
+ return 'default'
23
+
24
+
25
+ def _get_wait_time(platform: str) -> int:
26
+ """
27
+ Get wait time in milliseconds for platform.
28
+
29
+ Args:
30
+ platform: Platform name from _detect_platform
31
+
32
+ Returns:
33
+ Wait time in milliseconds
34
+ """
35
+ wait_times = {
36
+ 'doubao': 3000,
37
+ 'gemini': 5000,
38
+ 'default': 2000
39
+ }
40
+ return wait_times.get(platform, 2000)
41
+
42
+
43
+ def extract_from_url(url: str, timeout: int = 30000) -> str:
44
+ """
45
+ Extract text content from AI chat share URL.
46
+
47
+ Args:
48
+ url: Share URL (ChatGPT, Gemini, Doubao, etc.)
49
+ timeout: Page load timeout in milliseconds
50
+
51
+ Returns:
52
+ Extracted plain text content
53
+
54
+ Raises:
55
+ PlaywrightTimeoutError: If page fails to load
56
+ ValueError: If URL is invalid
57
+ """
58
+ if not url.startswith('http'):
59
+ raise ValueError(f"Invalid URL: {url}")
60
+
61
+ # Detect platform and get corresponding wait time
62
+ platform = _detect_platform(url)
63
+ wait_time = _get_wait_time(platform)
64
+
65
+ try:
66
+ with sync_playwright() as p:
67
+ browser = p.chromium.launch(headless=True)
68
+ page = browser.new_page()
69
+
70
+ # Navigate with appropriate wait strategy
71
+ # Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
72
+ wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
73
+ page.goto(url, wait_until=wait_strategy, timeout=timeout)
74
+
75
+ # Wait for content to load
76
+ # Try to wait for main selector (works for ChatGPT)
77
+ try:
78
+ page.wait_for_selector('main', timeout=10000)
79
+ except PlaywrightTimeoutError:
80
+ # Some platforms may not have 'main' element, continue anyway
81
+ pass
82
+
83
+ # Additional wait for dynamic content based on platform
84
+ page.wait_for_timeout(wait_time)
85
+
86
+ # Extract plain text from body
87
+ content = page.inner_text('body')
88
+
89
+ browser.close()
90
+
91
+ return content.strip()
92
+
93
+ except PlaywrightTimeoutError as e:
94
+ raise PlaywrightTimeoutError(
95
+ f"Failed to load page within {timeout}ms. "
96
+ "Check your network connection and URL validity."
97
+ ) from e
98
+
99
+
100
+ if __name__ == "__main__":
101
+ # Manual test
102
+ import sys
103
+ if len(sys.argv) > 1:
104
+ url = sys.argv[1]
105
+ print(f"Extracting from: {url}")
106
+ content = extract_from_url(url)
107
+ print(f"Extracted {len(content)} characters")
108
+ print(content[:500])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.1
3
+ Version: 1.1.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
@@ -32,7 +32,7 @@ Convert AI chat conversations to structured Markdown documents.
32
32
 
33
33
  ## Features
34
34
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
35
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
36
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
37
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
38
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +55,30 @@ aichat2md https://chatgpt.com/share/xxx
55
55
  aichat2md ~/Downloads/chat.webarchive
56
56
  ```
57
57
 
58
+ ## Supported Platforms
59
+
60
+ - **ChatGPT** - chatgpt.com share links
61
+ - **Gemini** - gemini.google.com or g.co share links
62
+ - **Doubao (豆包)** - doubao.com share links
63
+ - **Webarchive** - Safari exported .webarchive files (any platform)
64
+
65
+ ### Usage Examples
66
+
67
+ ```bash
68
+ # ChatGPT
69
+ aichat2md https://chatgpt.com/share/xxx
70
+
71
+ # Gemini (supports both long and short URLs)
72
+ aichat2md https://gemini.google.com/share/xxx
73
+ aichat2md https://g.co/gemini/share/xxx
74
+
75
+ # Doubao
76
+ aichat2md https://www.doubao.com/thread/xxx
77
+
78
+ # Webarchive file
79
+ aichat2md ~/Downloads/conversation.webarchive
80
+ ```
81
+
58
82
  ## Supported AI Backends
59
83
 
60
84
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "aichat2md"
7
- version = "1.0.1"
7
+ version = "1.1.0"
8
8
  description = "Convert AI chat conversations to structured Markdown"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -1,58 +0,0 @@
1
- """Extract content from ChatGPT share URLs using Playwright."""
2
-
3
- from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
-
5
-
6
- def extract_from_url(url: str, timeout: int = 30000) -> str:
7
- """
8
- Extract text content from ChatGPT share URL.
9
-
10
- Args:
11
- url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
12
- timeout: Page load timeout in milliseconds
13
-
14
- Returns:
15
- Extracted plain text content
16
-
17
- Raises:
18
- PlaywrightTimeoutError: If page fails to load
19
- ValueError: If URL is invalid
20
- """
21
- if not url.startswith('http'):
22
- raise ValueError(f"Invalid URL: {url}")
23
-
24
- try:
25
- with sync_playwright() as p:
26
- browser = p.chromium.launch(headless=True)
27
- page = browser.new_page()
28
-
29
- # Navigate and wait for network idle
30
- page.goto(url, wait_until='networkidle', timeout=timeout)
31
-
32
- # Wait for conversation content to load
33
- # ChatGPT share pages typically have conversation in main content area
34
- page.wait_for_selector('main', timeout=10000)
35
-
36
- # Extract plain text from body
37
- content = page.inner_text('body')
38
-
39
- browser.close()
40
-
41
- return content.strip()
42
-
43
- except PlaywrightTimeoutError as e:
44
- raise PlaywrightTimeoutError(
45
- f"Failed to load page within {timeout}ms. "
46
- "Check your network connection and URL validity."
47
- ) from e
48
-
49
-
50
- if __name__ == "__main__":
51
- # Manual test
52
- import sys
53
- if len(sys.argv) > 1:
54
- url = sys.argv[1]
55
- print(f"Extracting from: {url}")
56
- content = extract_from_url(url)
57
- print(f"Extracted {len(content)} characters")
58
- print(content[:500])
File without changes
File without changes
File without changes
File without changes
File without changes