aichat2md 1.0.0__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {aichat2md-1.0.0 → aichat2md-1.1.0}/PKG-INFO +29 -5
  2. {aichat2md-1.0.0 → aichat2md-1.1.0}/README.md +25 -1
  3. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/__init__.py +1 -1
  4. aichat2md-1.1.0/aichat2md/extractors/playwright_extractor.py +108 -0
  5. aichat2md-1.1.0/aichat2md/prompts/system_prompt_en.txt +50 -0
  6. aichat2md-1.1.0/aichat2md/prompts/system_prompt_zh.txt +50 -0
  7. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md.egg-info/PKG-INFO +29 -5
  8. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md.egg-info/SOURCES.txt +2 -0
  9. {aichat2md-1.0.0 → aichat2md-1.1.0}/pyproject.toml +7 -4
  10. aichat2md-1.0.0/aichat2md/extractors/playwright_extractor.py +0 -58
  11. {aichat2md-1.0.0 → aichat2md-1.1.0}/LICENSE +0 -0
  12. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/cli.py +0 -0
  13. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/config.py +0 -0
  14. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/extractors/__init__.py +0 -0
  15. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/extractors/webarchive_extractor.py +0 -0
  16. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/prompts/__init__.py +0 -0
  17. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md/structurizer.py +0 -0
  18. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md.egg-info/dependency_links.txt +0 -0
  19. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md.egg-info/entry_points.txt +0 -0
  20. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md.egg-info/requires.txt +0 -0
  21. {aichat2md-1.0.0 → aichat2md-1.1.0}/aichat2md.egg-info/top_level.txt +0 -0
  22. {aichat2md-1.0.0 → aichat2md-1.1.0}/setup.cfg +0 -0
  23. {aichat2md-1.0.0 → aichat2md-1.1.0}/tests/test_cli.py +0 -0
  24. {aichat2md-1.0.0 → aichat2md-1.1.0}/tests/test_config.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
7
- Project-URL: Homepage, https://github.com/yourusername/aichat2md
8
- Project-URL: Repository, https://github.com/yourusername/aichat2md
9
- Project-URL: Issues, https://github.com/yourusername/aichat2md/issues
7
+ Project-URL: Homepage, https://github.com/placenameday/aichat2md
8
+ Project-URL: Repository, https://github.com/placenameday/aichat2md
9
+ Project-URL: Issues, https://github.com/placenameday/aichat2md/issues
10
10
  Keywords: chatgpt,claude,markdown,ai,converter,deepseek
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Intended Audience :: Developers
@@ -32,7 +32,7 @@ Convert AI chat conversations to structured Markdown documents.
32
32
 
33
33
  ## Features
34
34
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
35
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
36
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
37
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
38
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +55,30 @@ aichat2md https://chatgpt.com/share/xxx
55
55
  aichat2md ~/Downloads/chat.webarchive
56
56
  ```
57
57
 
58
+ ## Supported Platforms
59
+
60
+ - **ChatGPT** - chatgpt.com share links
61
+ - **Gemini** - gemini.google.com or g.co share links
62
+ - **Doubao (豆包)** - doubao.com share links
63
+ - **Webarchive** - Safari exported .webarchive files (any platform)
64
+
65
+ ### Usage Examples
66
+
67
+ ```bash
68
+ # ChatGPT
69
+ aichat2md https://chatgpt.com/share/xxx
70
+
71
+ # Gemini (supports both long and short URLs)
72
+ aichat2md https://gemini.google.com/share/xxx
73
+ aichat2md https://g.co/gemini/share/xxx
74
+
75
+ # Doubao
76
+ aichat2md https://www.doubao.com/thread/xxx
77
+
78
+ # Webarchive file
79
+ aichat2md ~/Downloads/conversation.webarchive
80
+ ```
81
+
58
82
  ## Supported AI Backends
59
83
 
60
84
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -4,7 +4,7 @@ Convert AI chat conversations to structured Markdown documents.
4
4
 
5
5
  ## Features
6
6
 
7
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
7
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
8
8
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
9
9
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
10
10
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -27,6 +27,30 @@ aichat2md https://chatgpt.com/share/xxx
27
27
  aichat2md ~/Downloads/chat.webarchive
28
28
  ```
29
29
 
30
+ ## Supported Platforms
31
+
32
+ - **ChatGPT** - chatgpt.com share links
33
+ - **Gemini** - gemini.google.com or g.co share links
34
+ - **Doubao (豆包)** - doubao.com share links
35
+ - **Webarchive** - Safari exported .webarchive files (any platform)
36
+
37
+ ### Usage Examples
38
+
39
+ ```bash
40
+ # ChatGPT
41
+ aichat2md https://chatgpt.com/share/xxx
42
+
43
+ # Gemini (supports both long and short URLs)
44
+ aichat2md https://gemini.google.com/share/xxx
45
+ aichat2md https://g.co/gemini/share/xxx
46
+
47
+ # Doubao
48
+ aichat2md https://www.doubao.com/thread/xxx
49
+
50
+ # Webarchive file
51
+ aichat2md ~/Downloads/conversation.webarchive
52
+ ```
53
+
30
54
  ## Supported AI Backends
31
55
 
32
56
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -1,5 +1,5 @@
1
1
  """aichat2md - Convert AI chat conversations to structured Markdown."""
2
2
 
3
- __version__ = "1.0.0"
3
+ __version__ = "1.1.0"
4
4
  __author__ = "PlaceNameDay"
5
5
  __description__ = "Convert AI chat conversations to structured Markdown"
@@ -0,0 +1,108 @@
1
+ """Extract content from AI chat share URLs using Playwright."""
2
+
3
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
+
5
+
6
+ def _detect_platform(url: str) -> str:
7
+ """
8
+ Detect platform from URL.
9
+
10
+ Args:
11
+ url: Share URL
12
+
13
+ Returns:
14
+ Platform name: 'doubao', 'gemini', or 'default'
15
+ """
16
+ url_lower = url.lower()
17
+ if 'doubao.com' in url_lower:
18
+ return 'doubao'
19
+ elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
20
+ return 'gemini'
21
+ else:
22
+ return 'default'
23
+
24
+
25
+ def _get_wait_time(platform: str) -> int:
26
+ """
27
+ Get wait time in milliseconds for platform.
28
+
29
+ Args:
30
+ platform: Platform name from _detect_platform
31
+
32
+ Returns:
33
+ Wait time in milliseconds
34
+ """
35
+ wait_times = {
36
+ 'doubao': 3000,
37
+ 'gemini': 5000,
38
+ 'default': 2000
39
+ }
40
+ return wait_times.get(platform, 2000)
41
+
42
+
43
+ def extract_from_url(url: str, timeout: int = 30000) -> str:
44
+ """
45
+ Extract text content from AI chat share URL.
46
+
47
+ Args:
48
+ url: Share URL (ChatGPT, Gemini, Doubao, etc.)
49
+ timeout: Page load timeout in milliseconds
50
+
51
+ Returns:
52
+ Extracted plain text content
53
+
54
+ Raises:
55
+ PlaywrightTimeoutError: If page fails to load
56
+ ValueError: If URL is invalid
57
+ """
58
+ if not url.startswith('http'):
59
+ raise ValueError(f"Invalid URL: {url}")
60
+
61
+ # Detect platform and get corresponding wait time
62
+ platform = _detect_platform(url)
63
+ wait_time = _get_wait_time(platform)
64
+
65
+ try:
66
+ with sync_playwright() as p:
67
+ browser = p.chromium.launch(headless=True)
68
+ page = browser.new_page()
69
+
70
+ # Navigate with appropriate wait strategy
71
+ # Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
72
+ wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
73
+ page.goto(url, wait_until=wait_strategy, timeout=timeout)
74
+
75
+ # Wait for content to load
76
+ # Try to wait for main selector (works for ChatGPT)
77
+ try:
78
+ page.wait_for_selector('main', timeout=10000)
79
+ except PlaywrightTimeoutError:
80
+ # Some platforms may not have 'main' element, continue anyway
81
+ pass
82
+
83
+ # Additional wait for dynamic content based on platform
84
+ page.wait_for_timeout(wait_time)
85
+
86
+ # Extract plain text from body
87
+ content = page.inner_text('body')
88
+
89
+ browser.close()
90
+
91
+ return content.strip()
92
+
93
+ except PlaywrightTimeoutError as e:
94
+ raise PlaywrightTimeoutError(
95
+ f"Failed to load page within {timeout}ms. "
96
+ "Check your network connection and URL validity."
97
+ ) from e
98
+
99
+
100
+ if __name__ == "__main__":
101
+ # Manual test
102
+ import sys
103
+ if len(sys.argv) > 1:
104
+ url = sys.argv[1]
105
+ print(f"Extracting from: {url}")
106
+ content = extract_from_url(url)
107
+ print(f"Extracted {len(content)} characters")
108
+ print(content[:500])
@@ -0,0 +1,50 @@
1
+ You are a professional knowledge document editor.
2
+
3
+ Input: Raw text from an AI chat conversation
4
+ Output: Structured Markdown document
5
+
6
+ Requirements:
7
+ 1. Identify the main topic and generate a concise document title
8
+ 2. Extract technical tags (e.g., [Python, API, Web]), limit to 3-5 tags
9
+ 3. Write a summary (2-3 sentences covering core content)
10
+ 4. Reorganize the conversation into knowledge sections with logical headings (## and ###)
11
+ 5. Filter out conversational filler ("OK", "let me think", "thanks", etc.)
12
+ 6. Extract code blocks and present them separately in "Code Examples" section
13
+ 7. Do NOT preserve chat turn format (User/Assistant) - rewrite as flowing explanatory content
14
+ 8. Identify key topics and list them as bullet points
15
+
16
+ Output format (strictly follow):
17
+ ---
18
+ tags: [tag1, tag2, tag3]
19
+ date: YYYY-MM-DD
20
+ source: [original URL or filename]
21
+ ---
22
+
23
+ # Document Title
24
+
25
+ ## Summary
26
+ [2-3 sentences covering core content]
27
+
28
+ ## Key Topics
29
+ - Topic 1
30
+ - Topic 2
31
+ - Topic 3
32
+
33
+ ## [Knowledge Section Title 1]
34
+ [Reorganized explanatory content, not conversational format]
35
+
36
+ ### [Subsection Title]
37
+ [Detailed content]
38
+
39
+ ## [Knowledge Section Title 2]
40
+ [Content...]
41
+
42
+ ## Code Examples
43
+ ```language
44
+ # Complete code with explanatory comments
45
+ ```
46
+
47
+ Notes:
48
+ - Titles should be specific and informative, avoid generic titles like "Introduction", "Overview"
49
+ - Content should flow smoothly, like a tutorial or documentation rather than chat logs
50
+ - Code examples should be complete and runnable, with necessary comments
@@ -0,0 +1,50 @@
1
+ 你是一个专业的知识文档编辑器。
2
+
3
+ 输入:ChatGPT 对话的原始文本
4
+ 输出:结构化的 Markdown 文档
5
+
6
+ 要求:
7
+ 1. 识别对话主题,生成简洁的文档标题
8
+ 2. 提取技术标签(如 [Python, API, Web]),限制在 3-5 个标签
9
+ 3. 写一段摘要(2-3 句话概括核心内容)
10
+ 4. 将对话重组为知识章节,使用合理的标题层级(## 和 ###)
11
+ 5. 过滤无用的对话废话("好的"、"让我想想"、"谢谢"等)
12
+ 6. 提取代码块单独呈现在"代码示例"章节
13
+ 7. 不要保留对话轮次格式(User/Assistant),重组为流畅的说明性内容
14
+ 8. 识别关键主题,列为项目符号列表
15
+
16
+ 输出格式(严格遵循):
17
+ ---
18
+ 技术标签: [标签1, 标签2, 标签3]
19
+ 日期: YYYY-MM-DD
20
+ 来源: [原始URL或文件名]
21
+ ---
22
+
23
+ # 文档标题
24
+
25
+ ## 摘要
26
+ [2-3 句话概括核心内容]
27
+
28
+ ## 关键主题
29
+ - 主题 1
30
+ - 主题 2
31
+ - 主题 3
32
+
33
+ ## [知识点章节标题 1]
34
+ [重组后的说明性内容,非对话格式]
35
+
36
+ ### [子章节标题]
37
+ [详细内容]
38
+
39
+ ## [知识点章节标题 2]
40
+ [内容...]
41
+
42
+ ## 代码示例
43
+ ```语言
44
+ # 提取的完整代码,带注释说明
45
+ ```
46
+
47
+ 注意:
48
+ - 标题要具体且有信息量,避免"介绍"、"概述"等泛泛标题
49
+ - 内容要连贯流畅,像教程或文档而非聊天记录
50
+ - 代码示例要完整可运行,添加必要注释
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.0
3
+ Version: 1.1.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
7
- Project-URL: Homepage, https://github.com/yourusername/aichat2md
8
- Project-URL: Repository, https://github.com/yourusername/aichat2md
9
- Project-URL: Issues, https://github.com/yourusername/aichat2md/issues
7
+ Project-URL: Homepage, https://github.com/placenameday/aichat2md
8
+ Project-URL: Repository, https://github.com/placenameday/aichat2md
9
+ Project-URL: Issues, https://github.com/placenameday/aichat2md/issues
10
10
  Keywords: chatgpt,claude,markdown,ai,converter,deepseek
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Intended Audience :: Developers
@@ -32,7 +32,7 @@ Convert AI chat conversations to structured Markdown documents.
32
32
 
33
33
  ## Features
34
34
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
35
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
36
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
37
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
38
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +55,30 @@ aichat2md https://chatgpt.com/share/xxx
55
55
  aichat2md ~/Downloads/chat.webarchive
56
56
  ```
57
57
 
58
+ ## Supported Platforms
59
+
60
+ - **ChatGPT** - chatgpt.com share links
61
+ - **Gemini** - gemini.google.com or g.co share links
62
+ - **Doubao (豆包)** - doubao.com share links
63
+ - **Webarchive** - Safari exported .webarchive files (any platform)
64
+
65
+ ### Usage Examples
66
+
67
+ ```bash
68
+ # ChatGPT
69
+ aichat2md https://chatgpt.com/share/xxx
70
+
71
+ # Gemini (supports both long and short URLs)
72
+ aichat2md https://gemini.google.com/share/xxx
73
+ aichat2md https://g.co/gemini/share/xxx
74
+
75
+ # Doubao
76
+ aichat2md https://www.doubao.com/thread/xxx
77
+
78
+ # Webarchive file
79
+ aichat2md ~/Downloads/conversation.webarchive
80
+ ```
81
+
58
82
  ## Supported AI Backends
59
83
 
60
84
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -15,5 +15,7 @@ aichat2md/extractors/__init__.py
15
15
  aichat2md/extractors/playwright_extractor.py
16
16
  aichat2md/extractors/webarchive_extractor.py
17
17
  aichat2md/prompts/__init__.py
18
+ aichat2md/prompts/system_prompt_en.txt
19
+ aichat2md/prompts/system_prompt_zh.txt
18
20
  tests/test_cli.py
19
21
  tests/test_config.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "aichat2md"
7
- version = "1.0.0"
7
+ version = "1.1.0"
8
8
  description = "Convert AI chat conversations to structured Markdown"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -34,6 +34,9 @@ dependencies = [
34
34
  aichat2md = "aichat2md.cli:main"
35
35
 
36
36
  [project.urls]
37
- Homepage = "https://github.com/yourusername/aichat2md"
38
- Repository = "https://github.com/yourusername/aichat2md"
39
- Issues = "https://github.com/yourusername/aichat2md/issues"
37
+ Homepage = "https://github.com/placenameday/aichat2md"
38
+ Repository = "https://github.com/placenameday/aichat2md"
39
+ Issues = "https://github.com/placenameday/aichat2md/issues"
40
+
41
+ [tool.setuptools.package-data]
42
+ aichat2md = ["prompts/*.txt"]
@@ -1,58 +0,0 @@
1
- """Extract content from ChatGPT share URLs using Playwright."""
2
-
3
- from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
-
5
-
6
- def extract_from_url(url: str, timeout: int = 30000) -> str:
7
- """
8
- Extract text content from ChatGPT share URL.
9
-
10
- Args:
11
- url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
12
- timeout: Page load timeout in milliseconds
13
-
14
- Returns:
15
- Extracted plain text content
16
-
17
- Raises:
18
- PlaywrightTimeoutError: If page fails to load
19
- ValueError: If URL is invalid
20
- """
21
- if not url.startswith('http'):
22
- raise ValueError(f"Invalid URL: {url}")
23
-
24
- try:
25
- with sync_playwright() as p:
26
- browser = p.chromium.launch(headless=True)
27
- page = browser.new_page()
28
-
29
- # Navigate and wait for network idle
30
- page.goto(url, wait_until='networkidle', timeout=timeout)
31
-
32
- # Wait for conversation content to load
33
- # ChatGPT share pages typically have conversation in main content area
34
- page.wait_for_selector('main', timeout=10000)
35
-
36
- # Extract plain text from body
37
- content = page.inner_text('body')
38
-
39
- browser.close()
40
-
41
- return content.strip()
42
-
43
- except PlaywrightTimeoutError as e:
44
- raise PlaywrightTimeoutError(
45
- f"Failed to load page within {timeout}ms. "
46
- "Check your network connection and URL validity."
47
- ) from e
48
-
49
-
50
- if __name__ == "__main__":
51
- # Manual test
52
- import sys
53
- if len(sys.argv) > 1:
54
- url = sys.argv[1]
55
- print(f"Extracting from: {url}")
56
- content = extract_from_url(url)
57
- print(f"Extracted {len(content)} characters")
58
- print(content[:500])
File without changes
File without changes
File without changes
File without changes
File without changes