aichat2md 1.0.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {aichat2md-1.0.1 → aichat2md-1.2.0}/PKG-INFO +27 -2
  2. {aichat2md-1.0.1 → aichat2md-1.2.0}/README.md +25 -1
  3. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/__init__.py +1 -1
  4. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/cli.py +23 -5
  5. aichat2md-1.2.0/aichat2md/extractors/playwright_extractor.py +108 -0
  6. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/structurizer.py +3 -1
  7. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/PKG-INFO +27 -2
  8. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/requires.txt +1 -0
  9. {aichat2md-1.0.1 → aichat2md-1.2.0}/pyproject.toml +2 -1
  10. aichat2md-1.0.1/aichat2md/extractors/playwright_extractor.py +0 -58
  11. {aichat2md-1.0.1 → aichat2md-1.2.0}/LICENSE +0 -0
  12. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/config.py +0 -0
  13. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/extractors/__init__.py +0 -0
  14. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/extractors/webarchive_extractor.py +0 -0
  15. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/prompts/__init__.py +0 -0
  16. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/prompts/system_prompt_en.txt +0 -0
  17. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/prompts/system_prompt_zh.txt +0 -0
  18. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/SOURCES.txt +0 -0
  19. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/dependency_links.txt +0 -0
  20. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/entry_points.txt +0 -0
  21. {aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/top_level.txt +0 -0
  22. {aichat2md-1.0.1 → aichat2md-1.2.0}/setup.cfg +0 -0
  23. {aichat2md-1.0.1 → aichat2md-1.2.0}/tests/test_cli.py +0 -0
  24. {aichat2md-1.0.1 → aichat2md-1.2.0}/tests/test_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.1
3
+ Version: 1.2.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: playwright>=1.40.0
26
26
  Requires-Dist: requests>=2.31.0
27
+ Requires-Dist: yaspin>=3.0.0
27
28
  Dynamic: license-file
28
29
 
29
30
  # aichat2md
@@ -32,7 +33,7 @@ Convert AI chat conversations to structured Markdown documents.
32
33
 
33
34
  ## Features
34
35
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
36
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
37
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
38
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
39
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +56,30 @@ aichat2md https://chatgpt.com/share/xxx
55
56
  aichat2md ~/Downloads/chat.webarchive
56
57
  ```
57
58
 
59
+ ## Supported Platforms
60
+
61
+ - **ChatGPT** - chatgpt.com share links
62
+ - **Gemini** - gemini.google.com or g.co share links
63
+ - **Doubao (豆包)** - doubao.com share links
64
+ - **Webarchive** - Safari exported .webarchive files (any platform)
65
+
66
+ ### Usage Examples
67
+
68
+ ```bash
69
+ # ChatGPT
70
+ aichat2md https://chatgpt.com/share/xxx
71
+
72
+ # Gemini (supports both long and short URLs)
73
+ aichat2md https://gemini.google.com/share/xxx
74
+ aichat2md https://g.co/gemini/share/xxx
75
+
76
+ # Doubao
77
+ aichat2md https://www.doubao.com/thread/xxx
78
+
79
+ # Webarchive file
80
+ aichat2md ~/Downloads/conversation.webarchive
81
+ ```
82
+
58
83
  ## Supported AI Backends
59
84
 
60
85
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -4,7 +4,7 @@ Convert AI chat conversations to structured Markdown documents.
4
4
 
5
5
  ## Features
6
6
 
7
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
7
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
8
8
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
9
9
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
10
10
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -27,6 +27,30 @@ aichat2md https://chatgpt.com/share/xxx
27
27
  aichat2md ~/Downloads/chat.webarchive
28
28
  ```
29
29
 
30
+ ## Supported Platforms
31
+
32
+ - **ChatGPT** - chatgpt.com share links
33
+ - **Gemini** - gemini.google.com or g.co share links
34
+ - **Doubao (豆包)** - doubao.com share links
35
+ - **Webarchive** - Safari exported .webarchive files (any platform)
36
+
37
+ ### Usage Examples
38
+
39
+ ```bash
40
+ # ChatGPT
41
+ aichat2md https://chatgpt.com/share/xxx
42
+
43
+ # Gemini (supports both long and short URLs)
44
+ aichat2md https://gemini.google.com/share/xxx
45
+ aichat2md https://g.co/gemini/share/xxx
46
+
47
+ # Doubao
48
+ aichat2md https://www.doubao.com/thread/xxx
49
+
50
+ # Webarchive file
51
+ aichat2md ~/Downloads/conversation.webarchive
52
+ ```
53
+
30
54
  ## Supported AI Backends
31
55
 
32
56
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -1,5 +1,5 @@
1
1
  """aichat2md - Convert AI chat conversations to structured Markdown."""
2
2
 
3
- __version__ = "1.0.1"
3
+ __version__ = "1.2.0"
4
4
  __author__ = "PlaceNameDay"
5
5
  __description__ = "Convert AI chat conversations to structured Markdown"
@@ -15,6 +15,9 @@ import sys
15
15
  from pathlib import Path
16
16
  from datetime import datetime
17
17
  from typing import Tuple
18
+ import time
19
+
20
+ from yaspin import yaspin
18
21
 
19
22
  from .config import setup_config, load_config
20
23
  from .extractors.playwright_extractor import extract_from_url
@@ -23,6 +26,17 @@ from .structurizer import structurize_content
23
26
  from . import __version__
24
27
 
25
28
 
29
+ class TimedText:
30
+ """Dynamic text with elapsed time in seconds."""
31
+ def __init__(self, text: str):
32
+ self.text = text
33
+ self._start = time.time()
34
+
35
+ def __str__(self):
36
+ elapsed = int(time.time() - self._start)
37
+ return f"[{elapsed}s] {self.text}"
38
+
39
+
26
40
  def sanitize_filename(title: str, max_length: int = 50) -> str:
27
41
  """
28
42
  Sanitize title for use as filename.
@@ -87,15 +101,17 @@ def extract_content(input_path: str) -> Tuple[str, str]:
87
101
  Tuple of (extracted_text, source_identifier)
88
102
  """
89
103
  if input_path.startswith('http'):
90
- print(f"📡 Extracting from URL: {input_path}")
91
- text = extract_from_url(input_path)
104
+ with yaspin(text=TimedText(f"Extracting from URL (up to 60s): {input_path}")) as sp:
105
+ text = extract_from_url(input_path)
106
+ sp.ok(f"✓ Extracted {len(text)} characters")
92
107
  source = input_path
93
108
  else:
109
+ # Webarchive extraction is fast, no spinner needed
94
110
  print(f"📄 Extracting from webarchive: {input_path}")
95
111
  text = extract_from_webarchive(input_path)
112
+ print(f"✓ Extracted {len(text)} characters")
96
113
  source = Path(input_path).name
97
114
 
98
- print(f"✓ Extracted {len(text)} characters")
99
115
  return text, source
100
116
 
101
117
 
@@ -221,8 +237,10 @@ Examples:
221
237
 
222
238
  # Structurize with AI
223
239
  provider = config.get("api_base_url", "API")
224
- print(f"🤖 Structurizing with {provider}...")
225
- markdown = structurize_content(raw_text, config, source)
240
+ estimated = min(60 + len(raw_text) // 100, 600)
241
+ with yaspin(text=TimedText(f"Structurizing {len(raw_text)} chars with {provider} (~{estimated}s)")) as sp:
242
+ markdown = structurize_content(raw_text, config, source)
243
+ sp.ok("✓ Structurized")
226
244
 
227
245
  # Determine output path
228
246
  output_path = determine_output_path(args.input, markdown, config, args.output)
@@ -0,0 +1,108 @@
1
+ """Extract content from AI chat share URLs using Playwright."""
2
+
3
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
+
5
+
6
+ def _detect_platform(url: str) -> str:
7
+ """
8
+ Detect platform from URL.
9
+
10
+ Args:
11
+ url: Share URL
12
+
13
+ Returns:
14
+ Platform name: 'doubao', 'gemini', or 'default'
15
+ """
16
+ url_lower = url.lower()
17
+ if 'doubao.com' in url_lower:
18
+ return 'doubao'
19
+ elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
20
+ return 'gemini'
21
+ else:
22
+ return 'default'
23
+
24
+
25
+ def _get_wait_time(platform: str) -> int:
26
+ """
27
+ Get wait time in milliseconds for platform.
28
+
29
+ Args:
30
+ platform: Platform name from _detect_platform
31
+
32
+ Returns:
33
+ Wait time in milliseconds
34
+ """
35
+ wait_times = {
36
+ 'doubao': 3000,
37
+ 'gemini': 5000,
38
+ 'default': 2000
39
+ }
40
+ return wait_times.get(platform, 2000)
41
+
42
+
43
+ def extract_from_url(url: str, timeout: int = 60000) -> str:
44
+ """
45
+ Extract text content from AI chat share URL.
46
+
47
+ Args:
48
+ url: Share URL (ChatGPT, Gemini, Doubao, etc.)
49
+ timeout: Page load timeout in milliseconds
50
+
51
+ Returns:
52
+ Extracted plain text content
53
+
54
+ Raises:
55
+ PlaywrightTimeoutError: If page fails to load
56
+ ValueError: If URL is invalid
57
+ """
58
+ if not url.startswith('http'):
59
+ raise ValueError(f"Invalid URL: {url}")
60
+
61
+ # Detect platform and get corresponding wait time
62
+ platform = _detect_platform(url)
63
+ wait_time = _get_wait_time(platform)
64
+
65
+ try:
66
+ with sync_playwright() as p:
67
+ browser = p.chromium.launch(headless=True)
68
+ page = browser.new_page()
69
+
70
+ # Navigate with appropriate wait strategy
71
+ # Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
72
+ wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
73
+ page.goto(url, wait_until=wait_strategy, timeout=60000)
74
+
75
+ # Wait for content to load
76
+ # Try to wait for main selector (works for ChatGPT)
77
+ try:
78
+ page.wait_for_selector('main', timeout=10000)
79
+ except PlaywrightTimeoutError:
80
+ # Some platforms may not have 'main' element, continue anyway
81
+ pass
82
+
83
+ # Additional wait for dynamic content based on platform
84
+ page.wait_for_timeout(wait_time)
85
+
86
+ # Extract plain text from body
87
+ content = page.inner_text('body')
88
+
89
+ browser.close()
90
+
91
+ return content.strip()
92
+
93
+ except PlaywrightTimeoutError as e:
94
+ raise PlaywrightTimeoutError(
95
+ f"Failed to load page within {timeout}ms. "
96
+ "Check your network connection and URL validity."
97
+ ) from e
98
+
99
+
100
+ if __name__ == "__main__":
101
+ # Manual test
102
+ import sys
103
+ if len(sys.argv) > 1:
104
+ url = sys.argv[1]
105
+ print(f"Extracting from: {url}")
106
+ content = extract_from_url(url)
107
+ print(f"Extracted {len(content)} characters")
108
+ print(content[:500])
@@ -81,7 +81,9 @@ def structurize_content(
81
81
  }
82
82
 
83
83
  try:
84
- response = requests.post(api_url, headers=headers, json=payload, timeout=60)
84
+ # Dynamic timeout based on content size: 60s base + 1s per 100 chars, max 600s
85
+ estimated_timeout = min(60 + len(raw_text) // 100, 600)
86
+ response = requests.post(api_url, headers=headers, json=payload, timeout=estimated_timeout)
85
87
  response.raise_for_status()
86
88
 
87
89
  result = response.json()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aichat2md
3
- Version: 1.0.1
3
+ Version: 1.2.0
4
4
  Summary: Convert AI chat conversations to structured Markdown
5
5
  Author: PlaceNameDay
6
6
  License: MIT
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
24
24
  License-File: LICENSE
25
25
  Requires-Dist: playwright>=1.40.0
26
26
  Requires-Dist: requests>=2.31.0
27
+ Requires-Dist: yaspin>=3.0.0
27
28
  Dynamic: license-file
28
29
 
29
30
  # aichat2md
@@ -32,7 +33,7 @@ Convert AI chat conversations to structured Markdown documents.
32
33
 
33
34
  ## Features
34
35
 
35
- - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
36
+ - 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
36
37
  - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
37
38
  - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
39
  - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +56,30 @@ aichat2md https://chatgpt.com/share/xxx
55
56
  aichat2md ~/Downloads/chat.webarchive
56
57
  ```
57
58
 
59
+ ## Supported Platforms
60
+
61
+ - **ChatGPT** - chatgpt.com share links
62
+ - **Gemini** - gemini.google.com or g.co share links
63
+ - **Doubao (豆包)** - doubao.com share links
64
+ - **Webarchive** - Safari exported .webarchive files (any platform)
65
+
66
+ ### Usage Examples
67
+
68
+ ```bash
69
+ # ChatGPT
70
+ aichat2md https://chatgpt.com/share/xxx
71
+
72
+ # Gemini (supports both long and short URLs)
73
+ aichat2md https://gemini.google.com/share/xxx
74
+ aichat2md https://g.co/gemini/share/xxx
75
+
76
+ # Doubao
77
+ aichat2md https://www.doubao.com/thread/xxx
78
+
79
+ # Webarchive file
80
+ aichat2md ~/Downloads/conversation.webarchive
81
+ ```
82
+
58
83
  ## Supported AI Backends
59
84
 
60
85
  - **DeepSeek** (default) - Cost-effective, Chinese service
@@ -1,2 +1,3 @@
1
1
  playwright>=1.40.0
2
2
  requests>=2.31.0
3
+ yaspin>=3.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "aichat2md"
7
- version = "1.0.1"
7
+ version = "1.2.0"
8
8
  description = "Convert AI chat conversations to structured Markdown"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.8"
@@ -28,6 +28,7 @@ classifiers = [
28
28
  dependencies = [
29
29
  "playwright>=1.40.0",
30
30
  "requests>=2.31.0",
31
+ "yaspin>=3.0.0",
31
32
  ]
32
33
 
33
34
  [project.scripts]
@@ -1,58 +0,0 @@
1
- """Extract content from ChatGPT share URLs using Playwright."""
2
-
3
- from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
-
5
-
6
- def extract_from_url(url: str, timeout: int = 30000) -> str:
7
- """
8
- Extract text content from ChatGPT share URL.
9
-
10
- Args:
11
- url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
12
- timeout: Page load timeout in milliseconds
13
-
14
- Returns:
15
- Extracted plain text content
16
-
17
- Raises:
18
- PlaywrightTimeoutError: If page fails to load
19
- ValueError: If URL is invalid
20
- """
21
- if not url.startswith('http'):
22
- raise ValueError(f"Invalid URL: {url}")
23
-
24
- try:
25
- with sync_playwright() as p:
26
- browser = p.chromium.launch(headless=True)
27
- page = browser.new_page()
28
-
29
- # Navigate and wait for network idle
30
- page.goto(url, wait_until='networkidle', timeout=timeout)
31
-
32
- # Wait for conversation content to load
33
- # ChatGPT share pages typically have conversation in main content area
34
- page.wait_for_selector('main', timeout=10000)
35
-
36
- # Extract plain text from body
37
- content = page.inner_text('body')
38
-
39
- browser.close()
40
-
41
- return content.strip()
42
-
43
- except PlaywrightTimeoutError as e:
44
- raise PlaywrightTimeoutError(
45
- f"Failed to load page within {timeout}ms. "
46
- "Check your network connection and URL validity."
47
- ) from e
48
-
49
-
50
- if __name__ == "__main__":
51
- # Manual test
52
- import sys
53
- if len(sys.argv) > 1:
54
- url = sys.argv[1]
55
- print(f"Extracting from: {url}")
56
- content = extract_from_url(url)
57
- print(f"Extracted {len(content)} characters")
58
- print(content[:500])
File without changes
File without changes
File without changes
File without changes