PyPI - aichat2md - Versions diffs - 1.0.1__tar.gz → 1.2.0__tar.gz - Mend

aichat2md 1.0.1tar.gz → 1.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{aichat2md-1.0.1 → aichat2md-1.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aichat2md
-Version: 1.0.1
+Version: 1.2.0
 Summary: Convert AI chat conversations to structured Markdown
 Author: PlaceNameDay
 License: MIT
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: playwright>=1.40.0
 Requires-Dist: requests>=2.31.0
+Requires-Dist: yaspin>=3.0.0
 Dynamic: license-file
 # aichat2md
@@ -32,7 +33,7 @@ Convert AI chat conversations to structured Markdown documents.
 ## Features
-- 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
+- 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
 - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
 - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
 - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +56,30 @@ aichat2md https://chatgpt.com/share/xxx
 aichat2md ~/Downloads/chat.webarchive
 ```
+## Supported Platforms
+- **ChatGPT** - chatgpt.com share links
+- **Gemini** - gemini.google.com or g.co share links
+- **Doubao (豆包)** - doubao.com share links
+- **Webarchive** - Safari exported .webarchive files (any platform)
+### Usage Examples
+```bash
+# ChatGPT
+aichat2md https://chatgpt.com/share/xxx
+# Gemini (supports both long and short URLs)
+aichat2md https://gemini.google.com/share/xxx
+aichat2md https://g.co/gemini/share/xxx
+# Doubao
+aichat2md https://www.doubao.com/thread/xxx
+# Webarchive file
+aichat2md ~/Downloads/conversation.webarchive
+```
 ## Supported AI Backends
 - **DeepSeek** (default) - Cost-effective, Chinese service

{aichat2md-1.0.1 → aichat2md-1.2.0}/README.md RENAMED Viewed

@@ -4,7 +4,7 @@ Convert AI chat conversations to structured Markdown documents.
 ## Features
-- 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
+- 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
 - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
 - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
 - 🌍 **Bilingual support** - English/Chinese prompts
@@ -27,6 +27,30 @@ aichat2md https://chatgpt.com/share/xxx
 aichat2md ~/Downloads/chat.webarchive
 ```
+## Supported Platforms
+- **ChatGPT** - chatgpt.com share links
+- **Gemini** - gemini.google.com or g.co share links
+- **Doubao (豆包)** - doubao.com share links
+- **Webarchive** - Safari exported .webarchive files (any platform)
+### Usage Examples
+```bash
+# ChatGPT
+aichat2md https://chatgpt.com/share/xxx
+# Gemini (supports both long and short URLs)
+aichat2md https://gemini.google.com/share/xxx
+aichat2md https://g.co/gemini/share/xxx
+# Doubao
+aichat2md https://www.doubao.com/thread/xxx
+# Webarchive file
+aichat2md ~/Downloads/conversation.webarchive
+```
 ## Supported AI Backends
 - **DeepSeek** (default) - Cost-effective, Chinese service

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """aichat2md - Convert AI chat conversations to structured Markdown."""
-__version__ = "1.0.1"
+__version__ = "1.2.0"
 __author__ = "PlaceNameDay"
 __description__ = "Convert AI chat conversations to structured Markdown"

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/cli.py RENAMED Viewed

@@ -15,6 +15,9 @@ import sys
 from pathlib import Path
 from datetime import datetime
 from typing import Tuple
+import time
+from yaspin import yaspin
 from .config import setup_config, load_config
 from .extractors.playwright_extractor import extract_from_url
@@ -23,6 +26,17 @@ from .structurizer import structurize_content
 from . import __version__
+class TimedText:
+    """Dynamic text with elapsed time in seconds."""
+    def __init__(self, text: str):
+        self.text = text
+        self._start = time.time()
+    def __str__(self):
+        elapsed = int(time.time() - self._start)
+        return f"[{elapsed}s] {self.text}"
 def sanitize_filename(title: str, max_length: int = 50) -> str:
     """
     Sanitize title for use as filename.
@@ -87,15 +101,17 @@ def extract_content(input_path: str) -> Tuple[str, str]:
         Tuple of (extracted_text, source_identifier)
     """
     if input_path.startswith('http'):
-        print(f"📡 Extracting from URL: {input_path}")
-        text = extract_from_url(input_path)
+        with yaspin(text=TimedText(f"Extracting from URL (up to 60s): {input_path}")) as sp:
+            text = extract_from_url(input_path)
+            sp.ok(f"✓ Extracted {len(text)} characters")
         source = input_path
     else:
+        # Webarchive extraction is fast, no spinner needed
         print(f"📄 Extracting from webarchive: {input_path}")
         text = extract_from_webarchive(input_path)
+        print(f"✓ Extracted {len(text)} characters")
         source = Path(input_path).name
-    print(f"✓ Extracted {len(text)} characters")
     return text, source
@@ -221,8 +237,10 @@ Examples:
         # Structurize with AI
         provider = config.get("api_base_url", "API")
-        print(f"🤖 Structurizing with {provider}...")
-        markdown = structurize_content(raw_text, config, source)
+        estimated = min(60 + len(raw_text) // 100, 600)
+        with yaspin(text=TimedText(f"Structurizing {len(raw_text)} chars with {provider} (~{estimated}s)")) as sp:
+            markdown = structurize_content(raw_text, config, source)
+            sp.ok("✓ Structurized")
         # Determine output path
         output_path = determine_output_path(args.input, markdown, config, args.output)

aichat2md-1.2.0/aichat2md/extractors/playwright_extractor.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""Extract content from AI chat share URLs using Playwright."""
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
+def _detect_platform(url: str) -> str:
+    """
+    Detect platform from URL.
+    Args:
+        url: Share URL
+    Returns:
+        Platform name: 'doubao', 'gemini', or 'default'
+    """
+    url_lower = url.lower()
+    if 'doubao.com' in url_lower:
+        return 'doubao'
+    elif 'gemini.google.com' in url_lower or 'g.co' in url_lower:
+        return 'gemini'
+    else:
+        return 'default'
+def _get_wait_time(platform: str) -> int:
+    """
+    Get wait time in milliseconds for platform.
+    Args:
+        platform: Platform name from _detect_platform
+    Returns:
+        Wait time in milliseconds
+    """
+    wait_times = {
+        'doubao': 3000,
+        'gemini': 5000,
+        'default': 2000
+    }
+    return wait_times.get(platform, 2000)
+def extract_from_url(url: str, timeout: int = 60000) -> str:
+    """
+    Extract text content from AI chat share URL.
+    Args:
+        url: Share URL (ChatGPT, Gemini, Doubao, etc.)
+        timeout: Page load timeout in milliseconds
+    Returns:
+        Extracted plain text content
+    Raises:
+        PlaywrightTimeoutError: If page fails to load
+        ValueError: If URL is invalid
+    """
+    if not url.startswith('http'):
+        raise ValueError(f"Invalid URL: {url}")
+    # Detect platform and get corresponding wait time
+    platform = _detect_platform(url)
+    wait_time = _get_wait_time(platform)
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+            # Navigate with appropriate wait strategy
+            # Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
+            wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
+            page.goto(url, wait_until=wait_strategy, timeout=60000)
+            # Wait for content to load
+            # Try to wait for main selector (works for ChatGPT)
+            try:
+                page.wait_for_selector('main', timeout=10000)
+            except PlaywrightTimeoutError:
+                # Some platforms may not have 'main' element, continue anyway
+                pass
+            # Additional wait for dynamic content based on platform
+            page.wait_for_timeout(wait_time)
+            # Extract plain text from body
+            content = page.inner_text('body')
+            browser.close()
+            return content.strip()
+    except PlaywrightTimeoutError as e:
+        raise PlaywrightTimeoutError(
+            f"Failed to load page within {timeout}ms. "
+            "Check your network connection and URL validity."
+        ) from e
+if __name__ == "__main__":
+    # Manual test
+    import sys
+    if len(sys.argv) > 1:
+        url = sys.argv[1]
+        print(f"Extracting from: {url}")
+        content = extract_from_url(url)
+        print(f"Extracted {len(content)} characters")
+        print(content[:500])

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/structurizer.py RENAMED Viewed

@@ -81,7 +81,9 @@ def structurize_content(
     }
     try:
-        response = requests.post(api_url, headers=headers, json=payload, timeout=60)
+        # Dynamic timeout based on content size: 60s base + 1s per 100 chars, max 600s
+        estimated_timeout = min(60 + len(raw_text) // 100, 600)
+        response = requests.post(api_url, headers=headers, json=payload, timeout=estimated_timeout)
         response.raise_for_status()
         result = response.json()

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aichat2md
-Version: 1.0.1
+Version: 1.2.0
 Summary: Convert AI chat conversations to structured Markdown
 Author: PlaceNameDay
 License: MIT
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: playwright>=1.40.0
 Requires-Dist: requests>=2.31.0
+Requires-Dist: yaspin>=3.0.0
 Dynamic: license-file
 # aichat2md
@@ -32,7 +33,7 @@ Convert AI chat conversations to structured Markdown documents.
 ## Features
-- 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
+- 🌐 **Extract from URLs** - ChatGPT, Gemini, Doubao share links (with JS rendering via Playwright)
 - 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
 - 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
 - 🌍 **Bilingual support** - English/Chinese prompts
@@ -55,6 +56,30 @@ aichat2md https://chatgpt.com/share/xxx
 aichat2md ~/Downloads/chat.webarchive
 ```
+## Supported Platforms
+- **ChatGPT** - chatgpt.com share links
+- **Gemini** - gemini.google.com or g.co share links
+- **Doubao (豆包)** - doubao.com share links
+- **Webarchive** - Safari exported .webarchive files (any platform)
+### Usage Examples
+```bash
+# ChatGPT
+aichat2md https://chatgpt.com/share/xxx
+# Gemini (supports both long and short URLs)
+aichat2md https://gemini.google.com/share/xxx
+aichat2md https://g.co/gemini/share/xxx
+# Doubao
+aichat2md https://www.doubao.com/thread/xxx
+# Webarchive file
+aichat2md ~/Downloads/conversation.webarchive
+```
 ## Supported AI Backends
 - **DeepSeek** (default) - Cost-effective, Chinese service

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/requires.txt RENAMED Viewed

@@ -1,2 +1,3 @@
 playwright>=1.40.0
 requests>=2.31.0
+yaspin>=3.0.0

{aichat2md-1.0.1 → aichat2md-1.2.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "aichat2md"
-version = "1.0.1"
+version = "1.2.0"
 description = "Convert AI chat conversations to structured Markdown"
 readme = "README.md"
 requires-python = ">=3.8"
@@ -28,6 +28,7 @@ classifiers = [
 dependencies = [
     "playwright>=1.40.0",
     "requests>=2.31.0",
+    "yaspin>=3.0.0",
 ]
 [project.scripts]

aichat2md-1.0.1/aichat2md/extractors/playwright_extractor.py DELETED Viewed

@@ -1,58 +0,0 @@
-"""Extract content from ChatGPT share URLs using Playwright."""
-from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
-def extract_from_url(url: str, timeout: int = 30000) -> str:
-    """
-    Extract text content from ChatGPT share URL.
-    Args:
-        url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
-        timeout: Page load timeout in milliseconds
-    Returns:
-        Extracted plain text content
-    Raises:
-        PlaywrightTimeoutError: If page fails to load
-        ValueError: If URL is invalid
-    """
-    if not url.startswith('http'):
-        raise ValueError(f"Invalid URL: {url}")
-    try:
-        with sync_playwright() as p:
-            browser = p.chromium.launch(headless=True)
-            page = browser.new_page()
-            # Navigate and wait for network idle
-            page.goto(url, wait_until='networkidle', timeout=timeout)
-            # Wait for conversation content to load
-            # ChatGPT share pages typically have conversation in main content area
-            page.wait_for_selector('main', timeout=10000)
-            # Extract plain text from body
-            content = page.inner_text('body')
-            browser.close()
-            return content.strip()
-    except PlaywrightTimeoutError as e:
-        raise PlaywrightTimeoutError(
-            f"Failed to load page within {timeout}ms. "
-            "Check your network connection and URL validity."
-        ) from e
-if __name__ == "__main__":
-    # Manual test
-    import sys
-    if len(sys.argv) > 1:
-        url = sys.argv[1]
-        print(f"Extracting from: {url}")
-        content = extract_from_url(url)
-        print(f"Extracted {len(content)} characters")
-        print(content[:500])

{aichat2md-1.0.1 → aichat2md-1.2.0}/LICENSE RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/config.py RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/extractors/__init__.py RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/extractors/webarchive_extractor.py RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/prompts/__init__.py RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/prompts/system_prompt_en.txt RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md/prompts/system_prompt_zh.txt RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/entry_points.txt RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/aichat2md.egg-info/top_level.txt RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/setup.cfg RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/tests/test_cli.py RENAMED Viewed

File without changes

{aichat2md-1.0.1 → aichat2md-1.2.0}/tests/test_config.py RENAMED Viewed

File without changes

aichat2md 1.0.1__tar.gz → 1.2.0__tar.gz

aichat2md 1.0.1tar.gz → 1.2.0tar.gz