PyPI - aichat2md - Versions diffs - 1.0.0__py3-none-any.whl - Mend

aichat2md 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

aichat2md/__init__.py +5 -0
aichat2md/cli.py +254 -0
aichat2md/config.py +142 -0
aichat2md/extractors/__init__.py +6 -0
aichat2md/extractors/playwright_extractor.py +58 -0
aichat2md/extractors/webarchive_extractor.py +99 -0
aichat2md/prompts/__init__.py +1 -0
aichat2md/structurizer.py +142 -0
aichat2md-1.0.0.dist-info/METADATA +288 -0
aichat2md-1.0.0.dist-info/RECORD +14 -0
aichat2md-1.0.0.dist-info/WHEEL +5 -0
aichat2md-1.0.0.dist-info/entry_points.txt +2 -0
aichat2md-1.0.0.dist-info/licenses/LICENSE +21 -0
aichat2md-1.0.0.dist-info/top_level.txt +1 -0

aichat2md/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""aichat2md - Convert AI chat conversations to structured Markdown."""
+__version__ = "1.0.0"
+__author__ = "PlaceNameDay"
+__description__ = "Convert AI chat conversations to structured Markdown"

aichat2md/cli.py ADDED Viewed

@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+"""
+aichat2md - Convert AI chat conversations to structured Markdown.
+Usage:
+    aichat2md --setup                    # Initial configuration
+    aichat2md <url>                      # Extract from URL
+    aichat2md <file.webarchive>          # Extract from webarchive
+    aichat2md <url> --lang zh            # Override language
+    aichat2md <url> -o output.md         # Custom output path
+"""
+import argparse
+import sys
+from pathlib import Path
+from datetime import datetime
+from typing import Tuple
+from .config import setup_config, load_config
+from .extractors.playwright_extractor import extract_from_url
+from .extractors.webarchive_extractor import extract_from_webarchive
+from .structurizer import structurize_content
+from . import __version__
+def sanitize_filename(title: str, max_length: int = 50) -> str:
+    """
+    Sanitize title for use as filename.
+    Args:
+        title: Original title
+        max_length: Maximum length of filename
+    Returns:
+        Sanitized filename
+    """
+    # Remove or replace invalid filename characters
+    invalid_chars = '<>:"/\\|?*'
+    for char in invalid_chars:
+        title = title.replace(char, '-')
+    # Truncate to max length
+    if len(title) > max_length:
+        title = title[:max_length]
+    # Remove leading/trailing spaces and dots
+    title = title.strip('. ')
+    return title
+def generate_filename_from_markdown(markdown: str) -> str:
+    """
+    Extract title from markdown and generate filename.
+    Args:
+        markdown: Structured markdown content
+    Returns:
+        Filename in format: YYYY-MM-DD-title.md
+    """
+    # Extract first # heading as title
+    lines = markdown.split('\n')
+    title = "untitled"
+    for line in lines:
+        line = line.strip()
+        if line.startswith('# '):
+            title = line[2:].strip()
+            break
+    # Sanitize and format
+    title_clean = sanitize_filename(title)
+    today = datetime.now().strftime('%Y-%m-%d')
+    return f"{today}-{title_clean}.md"
+def extract_content(input_path: str) -> Tuple[str, str]:
+    """
+    Extract content from URL or webarchive file.
+    Args:
+        input_path: URL or file path
+    Returns:
+        Tuple of (extracted_text, source_identifier)
+    """
+    if input_path.startswith('http'):
+        print(f"📡 Extracting from URL: {input_path}")
+        text = extract_from_url(input_path)
+        source = input_path
+    else:
+        print(f"📄 Extracting from webarchive: {input_path}")
+        text = extract_from_webarchive(input_path)
+        source = Path(input_path).name
+    print(f"✓ Extracted {len(text)} characters")
+    return text, source
+def determine_output_path(input_path: str, markdown: str, config: dict, custom_output: str = None) -> Path:
+    """
+    Determine output path based on input type and custom override.
+    Args:
+        input_path: Original input (URL or file path)
+        markdown: Generated markdown (for title extraction)
+        config: Configuration dict
+        custom_output: Custom output path from CLI argument
+    Returns:
+        Output file path
+    """
+    if custom_output:
+        # Use custom output path
+        output_path = Path(custom_output).expanduser()
+        # Ensure .md extension
+        if not output_path.suffix:
+            output_path = output_path.with_suffix('.md')
+    elif input_path.startswith('http'):
+        # URL input: use config output_dir
+        output_dir = Path(config['output_dir']).expanduser()
+        output_dir.mkdir(parents=True, exist_ok=True)
+        filename = generate_filename_from_markdown(markdown)
+        output_path = output_dir / filename
+    else:
+        # Webarchive input: same directory as input file
+        input_file = Path(input_path)
+        output_path = input_file.with_suffix('.md')
+    # Handle filename conflicts
+    if output_path.exists():
+        base = output_path.stem
+        suffix = output_path.suffix
+        parent = output_path.parent
+        counter = 1
+        while output_path.exists():
+            output_path = parent / f"{base}-{counter}{suffix}"
+            counter += 1
+    return output_path
+def main():
+    """Main CLI entry point."""
+    parser = argparse.ArgumentParser(
+        prog="aichat2md",
+        description='Convert AI chat conversations to structured Markdown',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  aichat2md --setup
+  aichat2md https://chatgpt.com/share/xxx
+  aichat2md ~/Downloads/chat.webarchive
+  aichat2md <url> --lang zh
+  aichat2md <url> -o ~/Documents/output.md
+  aichat2md <url> --model gpt-4o
+        """
+    )
+    parser.add_argument(
+        'input',
+        nargs='?',
+        help='AI chat share URL or .webarchive file path'
+    )
+    parser.add_argument(
+        '--setup',
+        action='store_true',
+        help='Initialize configuration (API key, provider, language, etc.)'
+    )
+    parser.add_argument(
+        '--lang',
+        choices=['en', 'zh'],
+        help='Override prompt language (English or Chinese)'
+    )
+    parser.add_argument(
+        '--output', '-o',
+        help='Custom output file path'
+    )
+    parser.add_argument(
+        '--model',
+        help='Override AI model'
+    )
+    parser.add_argument(
+        '--version',
+        action='version',
+        version=f'%(prog)s {__version__}'
+    )
+    args = parser.parse_args()
+    # Handle setup mode
+    if args.setup:
+        setup_config()
+        return
+    # Validate input
+    if not args.input:
+        parser.print_help()
+        print("\n✗ Error: Please provide a URL or file path")
+        sys.exit(1)
+    try:
+        # Load configuration
+        config = load_config()
+        # Override config with CLI arguments
+        if args.lang:
+            config["language"] = args.lang
+        if args.model:
+            config["model"] = args.model
+        # Extract content
+        raw_text, source = extract_content(args.input)
+        # Structurize with AI
+        provider = config.get("api_base_url", "API")
+        print(f"🤖 Structurizing with {provider}...")
+        markdown = structurize_content(raw_text, config, source)
+        # Determine output path
+        output_path = determine_output_path(args.input, markdown, config, args.output)
+        # Ensure parent directory exists
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        # Save to file
+        output_path.write_text(markdown, encoding='utf-8')
+        print(f"✓ Saved to: {output_path}")
+    except FileNotFoundError as e:
+        print(f"✗ File error: {e}")
+        sys.exit(1)
+    except ValueError as e:
+        print(f"✗ Validation error: {e}")
+        sys.exit(1)
+    except Exception as e:
+        print(f"✗ Unexpected error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

aichat2md/config.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""Configuration management for aichat2md."""
+import json
+from pathlib import Path
+from typing import Dict, Any
+# Configuration file location (cross-platform)
+CONFIG_DIR = Path.home() / ".config" / "aichat2md"
+CONFIG_FILE = CONFIG_DIR / "config.json"
+# Default configuration
+DEFAULT_CONFIG = {
+    "api_key": "",
+    "api_base_url": "https://api.deepseek.com",
+    "language": "en",
+    "output_dir": str(Path.home() / "Downloads"),
+    "model": "deepseek-chat",
+    "max_tokens": 4000,
+    "temperature": 0.7
+}
+# API preset configurations
+API_PRESETS = {
+    "deepseek": {
+        "api_base_url": "https://api.deepseek.com",
+        "model": "deepseek-chat",
+        "description": "DeepSeek (cost-effective, Chinese service)"
+    },
+    "openai": {
+        "api_base_url": "https://api.openai.com/v1",
+        "model": "gpt-4o-mini",
+        "description": "OpenAI (GPT-4o-mini)"
+    },
+    "groq": {
+        "api_base_url": "https://api.groq.com/openai/v1",
+        "model": "llama-3.3-70b-versatile",
+        "description": "Groq (fast inference)"
+    },
+    "custom": {
+        "api_base_url": "",
+        "model": "",
+        "description": "Custom OpenAI-compatible API"
+    }
+}
+def get_default_output_dir() -> str:
+    """Get platform-specific default downloads directory."""
+    return str(Path.home() / "Downloads")
+def setup_config():
+    """Interactive config setup with API provider selection."""
+    print("=== aichat2md Configuration Setup ===\n")
+    # Step 1: Select API provider
+    print("Select API provider:")
+    for i, (key, preset) in enumerate(API_PRESETS.items(), 1):
+        print(f"{i}. {preset['description']}")
+    while True:
+        choice = input(f"\nChoice (1-{len(API_PRESETS)}) [1]: ").strip() or "1"
+        try:
+            choice_idx = int(choice) - 1
+            if 0 <= choice_idx < len(API_PRESETS):
+                break
+        except ValueError:
+            pass
+        print("Invalid choice, please try again")
+    provider_key = list(API_PRESETS.keys())[choice_idx]
+    preset = API_PRESETS[provider_key]
+    # Step 2: API configuration
+    api_key = input(f"\nEnter your {provider_key.upper()} API key: ").strip()
+    if provider_key == "custom":
+        api_base_url = input("Enter API base URL (e.g., http://localhost:8000): ").strip()
+        model = input("Enter model name: ").strip()
+    else:
+        api_base_url = preset["api_base_url"]
+        model = preset["model"]
+        print(f"Using: {api_base_url}")
+        print(f"Model: {model}")
+    # Step 3: Language selection
+    print("\nSelect language for AI prompts:")
+    print("1. English")
+    print("2. 中文 (Chinese)")
+    lang_choice = input("Choice (1-2) [1]: ").strip() or "1"
+    language = "zh" if lang_choice == "2" else "en"
+    # Step 4: Output directory
+    default_dir = get_default_output_dir()
+    output_dir = input(f"\nOutput directory (default: {default_dir}): ").strip()
+    if not output_dir:
+        output_dir = default_dir
+    # Create config
+    config = DEFAULT_CONFIG.copy()
+    config.update({
+        "api_key": api_key,
+        "api_base_url": api_base_url,
+        "model": model,
+        "language": language,
+        "output_dir": output_dir
+    })
+    # Save config
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    CONFIG_FILE.write_text(json.dumps(config, indent=2), encoding='utf-8')
+    print(f"\n✓ Configuration saved to {CONFIG_FILE}")
+def load_config() -> Dict[str, Any]:
+    """Load configuration from file."""
+    if not CONFIG_FILE.exists():
+        raise FileNotFoundError(
+            f"Configuration file not found. Please run: aichat2md --setup"
+        )
+    try:
+        config = json.loads(CONFIG_FILE.read_text(encoding='utf-8'))
+    except json.JSONDecodeError as e:
+        raise ValueError(f"Invalid JSON in config file: {e}")
+    if not config.get("api_key"):
+        raise ValueError("API key not configured. Please run: aichat2md --setup")
+    # Merge with defaults for backward compatibility
+    full_config = DEFAULT_CONFIG.copy()
+    full_config.update(config)
+    return full_config
+def validate_config(config: Dict[str, Any]) -> bool:
+    """Validate configuration has required fields."""
+    required_fields = ["api_key", "api_base_url", "model", "output_dir"]
+    return all(field in config and config[field] for field in required_fields)

aichat2md/extractors/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Content extractors for different sources."""
+from .playwright_extractor import extract_from_url
+from .webarchive_extractor import extract_from_webarchive
+__all__ = ['extract_from_url', 'extract_from_webarchive']

aichat2md/extractors/playwright_extractor.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""Extract content from ChatGPT share URLs using Playwright."""
+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
+def extract_from_url(url: str, timeout: int = 30000) -> str:
+    """
+    Extract text content from ChatGPT share URL.
+    Args:
+        url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
+        timeout: Page load timeout in milliseconds
+    Returns:
+        Extracted plain text content
+    Raises:
+        PlaywrightTimeoutError: If page fails to load
+        ValueError: If URL is invalid
+    """
+    if not url.startswith('http'):
+        raise ValueError(f"Invalid URL: {url}")
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.launch(headless=True)
+            page = browser.new_page()
+            # Navigate and wait for network idle
+            page.goto(url, wait_until='networkidle', timeout=timeout)
+            # Wait for conversation content to load
+            # ChatGPT share pages typically have conversation in main content area
+            page.wait_for_selector('main', timeout=10000)
+            # Extract plain text from body
+            content = page.inner_text('body')
+            browser.close()
+            return content.strip()
+    except PlaywrightTimeoutError as e:
+        raise PlaywrightTimeoutError(
+            f"Failed to load page within {timeout}ms. "
+            "Check your network connection and URL validity."
+        ) from e
+if __name__ == "__main__":
+    # Manual test
+    import sys
+    if len(sys.argv) > 1:
+        url = sys.argv[1]
+        print(f"Extracting from: {url}")
+        content = extract_from_url(url)
+        print(f"Extracted {len(content)} characters")
+        print(content[:500])

aichat2md/extractors/webarchive_extractor.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""Extract content from Safari .webarchive files."""
+import plistlib
+from pathlib import Path
+from html.parser import HTMLParser
+from typing import List
+class CleanHTMLParser(HTMLParser):
+    """HTML parser that extracts clean text, skipping scripts and styles."""
+    def __init__(self):
+        super().__init__()
+        self.text_chunks: List[str] = []
+        self.skip_tags = {'script', 'style', 'noscript'}
+        self.current_tag = None
+    def handle_starttag(self, tag, attrs):
+        if tag in self.skip_tags:
+            self.current_tag = tag
+    def handle_endtag(self, tag):
+        if tag == self.current_tag:
+            self.current_tag = None
+    def handle_data(self, data):
+        if self.current_tag is None:
+            # Clean whitespace but preserve structure
+            cleaned = data.strip()
+            if cleaned:
+                self.text_chunks.append(cleaned)
+    def get_text(self) -> str:
+        """Get extracted text with normalized spacing."""
+        return '\n'.join(self.text_chunks)
+def extract_from_webarchive(filepath: str) -> str:
+    """
+    Extract text content from Safari .webarchive file.
+    Args:
+        filepath: Path to .webarchive file
+    Returns:
+        Extracted plain text content
+    Raises:
+        FileNotFoundError: If file doesn't exist
+        ValueError: If file is not a valid webarchive
+    """
+    path = Path(filepath)
+    if not path.exists():
+        raise FileNotFoundError(f"File not found: {filepath}")
+    if path.suffix.lower() != '.webarchive':
+        raise ValueError(f"Not a webarchive file: {filepath}")
+    try:
+        with open(filepath, 'rb') as f:
+            plist = plistlib.load(f)
+        # Extract main resource HTML data
+        if 'WebMainResource' not in plist:
+            raise ValueError("Invalid webarchive: missing WebMainResource")
+        main_resource = plist['WebMainResource']
+        if 'WebResourceData' not in main_resource:
+            raise ValueError("Invalid webarchive: missing WebResourceData")
+        html_data = main_resource['WebResourceData']
+        # Decode HTML (try UTF-8, fallback to latin-1)
+        try:
+            html = html_data.decode('utf-8')
+        except UnicodeDecodeError:
+            html = html_data.decode('latin-1', errors='ignore')
+        # Parse and clean HTML
+        parser = CleanHTMLParser()
+        parser.feed(html)
+        return parser.get_text()
+    except plistlib.InvalidFileException as e:
+        raise ValueError(f"Invalid webarchive format: {e}") from e
+if __name__ == "__main__":
+    # Manual test
+    import sys
+    if len(sys.argv) > 1:
+        filepath = sys.argv[1]
+        print(f"Extracting from: {filepath}")
+        content = extract_from_webarchive(filepath)
+        print(f"Extracted {len(content)} characters")
+        print(content[:500])

aichat2md/prompts/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Prompt templates for different languages."""

aichat2md/structurizer.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""AI structurization using OpenAI-compatible APIs."""
+import requests
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any
+def load_system_prompt(language: str) -> str:
+    """
+    Load system prompt for the specified language.
+    Args:
+        language: Language code ('en' or 'zh')
+    Returns:
+        System prompt text
+    Raises:
+        FileNotFoundError: If prompt file doesn't exist
+    """
+    prompt_file = Path(__file__).parent / "prompts" / f"system_prompt_{language}.txt"
+    if not prompt_file.exists():
+        raise FileNotFoundError(f"Prompt file not found: {prompt_file}")
+    return prompt_file.read_text(encoding='utf-8')
+def structurize_content(
+    raw_text: str,
+    config: Dict[str, Any],
+    source: str = ""
+) -> str:
+    """
+    Structurize raw text into Markdown using OpenAI-compatible API.
+    Args:
+        raw_text: Raw extracted text from AI conversation
+        config: Configuration dict with API credentials
+        source: Original source URL or filename
+    Returns:
+        Structured Markdown content
+    Raises:
+        requests.exceptions.HTTPError: If API call fails
+        ValueError: If response is invalid
+    """
+    # Load system prompt based on language
+    language = config.get("language", "en")
+    system_prompt = load_system_prompt(language)
+    # Append source info to prompt if available
+    if source:
+        if language == "zh":
+            system_prompt += f"\n\n原始来源: {source}"
+        else:
+            system_prompt += f"\n\nOriginal source: {source}"
+    # Construct API URL (ensure /v1/chat/completions endpoint)
+    api_base = config["api_base_url"].rstrip('/')
+    if not api_base.endswith('/v1'):
+        api_url = f"{api_base}/v1/chat/completions"
+    else:
+        api_url = f"{api_base}/chat/completions"
+    headers = {
+        'Authorization': f'Bearer {config["api_key"]}',
+        'Content-Type': 'application/json'
+    }
+    payload = {
+        'model': config.get('model', 'deepseek-chat'),
+        'messages': [
+            {'role': 'system', 'content': system_prompt},
+            {'role': 'user', 'content': raw_text}
+        ],
+        'max_tokens': config.get('max_tokens', 4000),
+        'temperature': config.get('temperature', 0.7)
+    }
+    try:
+        response = requests.post(api_url, headers=headers, json=payload, timeout=60)
+        response.raise_for_status()
+        result = response.json()
+        if 'choices' not in result or len(result['choices']) == 0:
+            raise ValueError("Invalid API response: missing choices")
+        markdown = result['choices'][0]['message']['content']
+        # Ensure front matter has date and source if not already present
+        if not markdown.startswith('---'):
+            # Add front matter if missing
+            today = datetime.now().strftime('%Y-%m-%d')
+            if language == "zh":
+                front_matter = f"""---
+技术标签: []
+日期: {today}
+来源: {source or 'Unknown'}
+---
+"""
+            else:
+                front_matter = f"""---
+tags: []
+date: {today}
+source: {source or 'Unknown'}
+---
+"""
+            markdown = front_matter + markdown
+        return markdown
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 401:
+            raise requests.exceptions.HTTPError(
+                "API authentication failed. Check your API key"
+            ) from e
+        elif e.response.status_code == 429:
+            raise requests.exceptions.HTTPError(
+                "Rate limit exceeded. Please wait and try again"
+            ) from e
+        else:
+            error_msg = f"API request failed: {e.response.status_code}"
+            try:
+                error_detail = e.response.json()
+                error_msg += f" - {error_detail}"
+            except:
+                error_msg += f" - {e.response.text[:200]}"
+            raise requests.exceptions.HTTPError(error_msg) from e
+    except requests.exceptions.Timeout:
+        raise TimeoutError(
+            "API request timed out. The conversation might be too long"
+        )
+    except requests.exceptions.RequestException as e:
+        raise RuntimeError(f"Network error: {e}") from e

aichat2md-1.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,288 @@
+Metadata-Version: 2.4
+Name: aichat2md
+Version: 1.0.0
+Summary: Convert AI chat conversations to structured Markdown
+Author: PlaceNameDay
+License: MIT
+Project-URL: Homepage, https://github.com/yourusername/aichat2md
+Project-URL: Repository, https://github.com/yourusername/aichat2md
+Project-URL: Issues, https://github.com/yourusername/aichat2md/issues
+Keywords: chatgpt,claude,markdown,ai,converter,deepseek
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Software Development :: Documentation
+Classifier: Topic :: Text Processing :: Markup :: Markdown
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: playwright>=1.40.0
+Requires-Dist: requests>=2.31.0
+Dynamic: license-file
+# aichat2md
+Convert AI chat conversations to structured Markdown documents.
+## Features
+- 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
+- 📄 **Extract from webarchive** - Safari .webarchive files (offline mode)
+- 🤖 **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
+- 🌍 **Bilingual support** - English/Chinese prompts
+- 📝 **Clean output** - Knowledge-focused Markdown, not chat logs
+- ⚡ **Simple CLI** - pip-installable, one-time setup
+## Quick Start
+```bash
+# Install
+pip install aichat2md
+# Configure (one-time setup)
+aichat2md --setup
+# Convert a ChatGPT share URL
+aichat2md https://chatgpt.com/share/xxx
+# Convert a webarchive file
+aichat2md ~/Downloads/chat.webarchive
+```
+## Supported AI Backends
+- **DeepSeek** (default) - Cost-effective, Chinese service
+- **OpenAI** - GPT-4o-mini, GPT-4
+- **Groq** - Fast inference with Llama models
+- **Custom** - Any OpenAI-compatible API
+## Installation
+### Prerequisites
+- Python 3.8 or higher
+- Playwright (automatically installed, but requires browser setup)
+### Install from PyPI
+```bash
+pip install aichat2md
+```
+### Install Playwright browsers
+```bash
+playwright install chromium
+```
+### First-time Setup
+```bash
+aichat2md --setup
+```
+You'll be prompted to:
+1. Select your AI provider (DeepSeek, OpenAI, Groq, or custom)
+2. Enter your API key
+3. Choose prompt language (English or Chinese)
+4. Set output directory (default: ~/Downloads)
+## Usage
+### Basic Usage
+```bash
+# Convert from URL (uses configured output directory)
+aichat2md https://chatgpt.com/share/xxx
+# Convert from webarchive (outputs to same directory as input)
+aichat2md ~/Downloads/chat.webarchive
+```
+### Override Language
+```bash
+# Use Chinese prompts (even if English is configured)
+aichat2md <url> --lang zh
+# Use English prompts
+aichat2md <url> --lang en
+```
+### Custom Output Path
+```bash
+# Specify output file
+aichat2md <url> -o ~/Documents/my-notes.md
+aichat2md <url> --output ~/Documents/my-notes.md
+```
+### Override Model
+```bash
+# Use a different model than configured
+aichat2md <url> --model gpt-4o
+aichat2md <url> --model deepseek-chat
+```
+### Version Info
+```bash
+aichat2md --version
+```
+## Configuration
+Configuration is stored in `~/.config/aichat2md/config.json` (cross-platform).
+### Example Config
+```json
+{
+  "api_key": "sk-your-api-key",
+  "api_base_url": "https://api.deepseek.com",
+  "model": "deepseek-chat",
+  "language": "en",
+  "output_dir": "/Users/you/Downloads",
+  "max_tokens": 4000,
+  "temperature": 0.7
+}
+```
+### Reconfigure
+```bash
+aichat2md --setup
+```
+## Output Format
+The tool converts chat conversations into structured Markdown with:
+- **Front matter** - Tags, date, source
+- **Summary** - 2-3 sentence overview
+- **Key topics** - Bullet point list
+- **Knowledge sections** - Reorganized content with logical headings
+- **Code examples** - Extracted code blocks with comments
+### Example Output
+```markdown
+---
+tags: [Python, API, Web]
+date: 2026-02-02
+source: https://chatgpt.com/share/xxx
+---
+# Building REST APIs with FastAPI
+## Summary
+This document covers building production-ready REST APIs using FastAPI...
+## Key Topics
+- API design patterns
+- Request validation
+- Error handling
+## API Design Principles
+...
+## Code Examples
+\```python
+from fastapi import FastAPI
+app = FastAPI()
+...
+\```
+```
+## How It Works
+1. **Extract** - Playwright (URLs) or plistlib (webarchive) extracts raw text
+2. **Structurize** - AI API reorganizes into knowledge document
+3. **Save** - Auto-generated filename or specified path
+### Why Two-Stage Processing?
+- **Stage 1 (Extract)** - No AI tokens used, just HTML parsing
+- **Stage 2 (Structurize)** - AI organizes content efficiently
+This saves costs and allows local caching of extracted content.
+## Development
+### Local Installation
+```bash
+# Clone repository
+git clone https://github.com/yourusername/aichat2md.git
+cd aichat2md
+# Install in editable mode
+pip install -e .
+# Install Playwright
+playwright install chromium
+```
+### Run Tests
+```bash
+pip install pytest
+pytest tests/
+```
+### Build Package
+```bash
+pip install build
+python -m build
+```
+## Troubleshooting
+### "Configuration file not found"
+Run `aichat2md --setup` to create configuration.
+### "API authentication failed"
+Check your API key in `~/.config/aichat2md/config.json`.
+### Playwright errors
+Install browsers: `playwright install chromium`
+### Empty output
+The conversation might be too short or the AI response failed. Check error messages.
+## Contributing
+Contributions welcome! Please:
+1. Fork the repository
+2. Create a feature branch
+3. Add tests for new features
+4. Submit a pull request
+## License
+MIT License - see [LICENSE](LICENSE) file.
+## Links
+- [GitHub Repository](https://github.com/yourusername/aichat2md)
+- [Issue Tracker](https://github.com/yourusername/aichat2md/issues)
+- [中文文档](README_zh.md)
+## Acknowledgments
+- [Playwright](https://playwright.dev/) - Web automation
+- [DeepSeek](https://www.deepseek.com/) - Cost-effective AI API
+- [OpenAI](https://openai.com/) - API compatibility standard

aichat2md-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+aichat2md/__init__.py,sha256=kNascuTiKyIixYwF9OeWUHo4ILlBRP9U9sGsFqFoczQ,196
+aichat2md/cli.py,sha256=bu_lnD85xLB-xKA04iMwj4WgKC0IkkJsHMnsJSA65H4,6905
+aichat2md/config.py,sha256=VO4fA_ByRKVRPa61W3VwIBjMPDsMt3iagFP2NkBSU7U,4351
+aichat2md/structurizer.py,sha256=0v1Hjo9KYcurBEaKJNt4MaqfVHzgEfHmH-KbIPO1Zcg,4213
+aichat2md/extractors/__init__.py,sha256=HzIWd2aZBACnWs2N2pPjIa7vjM-azPz-bqEviN0QgTs,217
+aichat2md/extractors/playwright_extractor.py,sha256=eB3VLogTnv6uYm3DAfT_8t6CmIsyt3SIBo0Slgd7Rc4,1752
+aichat2md/extractors/webarchive_extractor.py,sha256=eIZIVzLlBgO41Yzz8EKmjA8Diq3btlQO8S5mljDQWfs,2842
+aichat2md/prompts/__init__.py,sha256=cPdhDyL1QeVhl5gVFYb50zYMi24iGmxz6R_rrVy1-yk,48
+aichat2md-1.0.0.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
+aichat2md-1.0.0.dist-info/METADATA,sha256=SG4osBMsJ5Qblh-iA29q7mBysh9JSc5n8HG_e7k3kEs,6290
+aichat2md-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+aichat2md-1.0.0.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
+aichat2md-1.0.0.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
+aichat2md-1.0.0.dist-info/RECORD,,

aichat2md-1.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.10.2)
+Root-Is-Purelib: true
+Tag: py3-none-any

aichat2md-1.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ aichat2md = aichat2md.cli:main

aichat2md-1.0.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 PlaceNameDay
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

aichat2md-1.0.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ aichat2md