aichat2md 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aichat2md/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """aichat2md - Convert AI chat conversations to structured Markdown."""
2
+
3
+ __version__ = "1.0.0"
4
+ __author__ = "PlaceNameDay"
5
+ __description__ = "Convert AI chat conversations to structured Markdown"
aichat2md/cli.py ADDED
@@ -0,0 +1,254 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ aichat2md - Convert AI chat conversations to structured Markdown.
4
+
5
+ Usage:
6
+ aichat2md --setup # Initial configuration
7
+ aichat2md <url> # Extract from URL
8
+ aichat2md <file.webarchive> # Extract from webarchive
9
+ aichat2md <url> --lang zh # Override language
10
+ aichat2md <url> -o output.md # Custom output path
11
+ """
12
+
13
+ import argparse
14
+ import sys
15
+ from pathlib import Path
16
+ from datetime import datetime
17
+ from typing import Tuple
18
+
19
+ from .config import setup_config, load_config
20
+ from .extractors.playwright_extractor import extract_from_url
21
+ from .extractors.webarchive_extractor import extract_from_webarchive
22
+ from .structurizer import structurize_content
23
+ from . import __version__
24
+
25
+
26
+ def sanitize_filename(title: str, max_length: int = 50) -> str:
27
+ """
28
+ Sanitize title for use as filename.
29
+
30
+ Args:
31
+ title: Original title
32
+ max_length: Maximum length of filename
33
+
34
+ Returns:
35
+ Sanitized filename
36
+ """
37
+ # Remove or replace invalid filename characters
38
+ invalid_chars = '<>:"/\\|?*'
39
+ for char in invalid_chars:
40
+ title = title.replace(char, '-')
41
+
42
+ # Truncate to max length
43
+ if len(title) > max_length:
44
+ title = title[:max_length]
45
+
46
+ # Remove leading/trailing spaces and dots
47
+ title = title.strip('. ')
48
+
49
+ return title
50
+
51
+
52
+ def generate_filename_from_markdown(markdown: str) -> str:
53
+ """
54
+ Extract title from markdown and generate filename.
55
+
56
+ Args:
57
+ markdown: Structured markdown content
58
+
59
+ Returns:
60
+ Filename in format: YYYY-MM-DD-title.md
61
+ """
62
+ # Extract first # heading as title
63
+ lines = markdown.split('\n')
64
+ title = "untitled"
65
+
66
+ for line in lines:
67
+ line = line.strip()
68
+ if line.startswith('# '):
69
+ title = line[2:].strip()
70
+ break
71
+
72
+ # Sanitize and format
73
+ title_clean = sanitize_filename(title)
74
+ today = datetime.now().strftime('%Y-%m-%d')
75
+
76
+ return f"{today}-{title_clean}.md"
77
+
78
+
79
+ def extract_content(input_path: str) -> Tuple[str, str]:
80
+ """
81
+ Extract content from URL or webarchive file.
82
+
83
+ Args:
84
+ input_path: URL or file path
85
+
86
+ Returns:
87
+ Tuple of (extracted_text, source_identifier)
88
+ """
89
+ if input_path.startswith('http'):
90
+ print(f"πŸ“‘ Extracting from URL: {input_path}")
91
+ text = extract_from_url(input_path)
92
+ source = input_path
93
+ else:
94
+ print(f"πŸ“„ Extracting from webarchive: {input_path}")
95
+ text = extract_from_webarchive(input_path)
96
+ source = Path(input_path).name
97
+
98
+ print(f"βœ“ Extracted {len(text)} characters")
99
+ return text, source
100
+
101
+
102
+ def determine_output_path(input_path: str, markdown: str, config: dict, custom_output: str = None) -> Path:
103
+ """
104
+ Determine output path based on input type and custom override.
105
+
106
+ Args:
107
+ input_path: Original input (URL or file path)
108
+ markdown: Generated markdown (for title extraction)
109
+ config: Configuration dict
110
+ custom_output: Custom output path from CLI argument
111
+
112
+ Returns:
113
+ Output file path
114
+ """
115
+ if custom_output:
116
+ # Use custom output path
117
+ output_path = Path(custom_output).expanduser()
118
+ # Ensure .md extension
119
+ if not output_path.suffix:
120
+ output_path = output_path.with_suffix('.md')
121
+ elif input_path.startswith('http'):
122
+ # URL input: use config output_dir
123
+ output_dir = Path(config['output_dir']).expanduser()
124
+ output_dir.mkdir(parents=True, exist_ok=True)
125
+ filename = generate_filename_from_markdown(markdown)
126
+ output_path = output_dir / filename
127
+ else:
128
+ # Webarchive input: same directory as input file
129
+ input_file = Path(input_path)
130
+ output_path = input_file.with_suffix('.md')
131
+
132
+ # Handle filename conflicts
133
+ if output_path.exists():
134
+ base = output_path.stem
135
+ suffix = output_path.suffix
136
+ parent = output_path.parent
137
+ counter = 1
138
+ while output_path.exists():
139
+ output_path = parent / f"{base}-{counter}{suffix}"
140
+ counter += 1
141
+
142
+ return output_path
143
+
144
+
145
+ def main():
146
+ """Main CLI entry point."""
147
+ parser = argparse.ArgumentParser(
148
+ prog="aichat2md",
149
+ description='Convert AI chat conversations to structured Markdown',
150
+ formatter_class=argparse.RawDescriptionHelpFormatter,
151
+ epilog="""
152
+ Examples:
153
+ aichat2md --setup
154
+ aichat2md https://chatgpt.com/share/xxx
155
+ aichat2md ~/Downloads/chat.webarchive
156
+ aichat2md <url> --lang zh
157
+ aichat2md <url> -o ~/Documents/output.md
158
+ aichat2md <url> --model gpt-4o
159
+ """
160
+ )
161
+
162
+ parser.add_argument(
163
+ 'input',
164
+ nargs='?',
165
+ help='AI chat share URL or .webarchive file path'
166
+ )
167
+
168
+ parser.add_argument(
169
+ '--setup',
170
+ action='store_true',
171
+ help='Initialize configuration (API key, provider, language, etc.)'
172
+ )
173
+
174
+ parser.add_argument(
175
+ '--lang',
176
+ choices=['en', 'zh'],
177
+ help='Override prompt language (English or Chinese)'
178
+ )
179
+
180
+ parser.add_argument(
181
+ '--output', '-o',
182
+ help='Custom output file path'
183
+ )
184
+
185
+ parser.add_argument(
186
+ '--model',
187
+ help='Override AI model'
188
+ )
189
+
190
+ parser.add_argument(
191
+ '--version',
192
+ action='version',
193
+ version=f'%(prog)s {__version__}'
194
+ )
195
+
196
+ args = parser.parse_args()
197
+
198
+ # Handle setup mode
199
+ if args.setup:
200
+ setup_config()
201
+ return
202
+
203
+ # Validate input
204
+ if not args.input:
205
+ parser.print_help()
206
+ print("\nβœ— Error: Please provide a URL or file path")
207
+ sys.exit(1)
208
+
209
+ try:
210
+ # Load configuration
211
+ config = load_config()
212
+
213
+ # Override config with CLI arguments
214
+ if args.lang:
215
+ config["language"] = args.lang
216
+ if args.model:
217
+ config["model"] = args.model
218
+
219
+ # Extract content
220
+ raw_text, source = extract_content(args.input)
221
+
222
+ # Structurize with AI
223
+ provider = config.get("api_base_url", "API")
224
+ print(f"πŸ€– Structurizing with {provider}...")
225
+ markdown = structurize_content(raw_text, config, source)
226
+
227
+ # Determine output path
228
+ output_path = determine_output_path(args.input, markdown, config, args.output)
229
+
230
+ # Ensure parent directory exists
231
+ output_path.parent.mkdir(parents=True, exist_ok=True)
232
+
233
+ # Save to file
234
+ output_path.write_text(markdown, encoding='utf-8')
235
+
236
+ print(f"βœ“ Saved to: {output_path}")
237
+
238
+ except FileNotFoundError as e:
239
+ print(f"βœ— File error: {e}")
240
+ sys.exit(1)
241
+
242
+ except ValueError as e:
243
+ print(f"βœ— Validation error: {e}")
244
+ sys.exit(1)
245
+
246
+ except Exception as e:
247
+ print(f"βœ— Unexpected error: {e}")
248
+ import traceback
249
+ traceback.print_exc()
250
+ sys.exit(1)
251
+
252
+
253
+ if __name__ == "__main__":
254
+ main()
aichat2md/config.py ADDED
@@ -0,0 +1,142 @@
1
+ """Configuration management for aichat2md."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Dict, Any
6
+
7
+
8
+ # Configuration file location (cross-platform)
9
+ CONFIG_DIR = Path.home() / ".config" / "aichat2md"
10
+ CONFIG_FILE = CONFIG_DIR / "config.json"
11
+
12
+ # Default configuration
13
+ DEFAULT_CONFIG = {
14
+ "api_key": "",
15
+ "api_base_url": "https://api.deepseek.com",
16
+ "language": "en",
17
+ "output_dir": str(Path.home() / "Downloads"),
18
+ "model": "deepseek-chat",
19
+ "max_tokens": 4000,
20
+ "temperature": 0.7
21
+ }
22
+
23
+ # API preset configurations
24
+ API_PRESETS = {
25
+ "deepseek": {
26
+ "api_base_url": "https://api.deepseek.com",
27
+ "model": "deepseek-chat",
28
+ "description": "DeepSeek (cost-effective, Chinese service)"
29
+ },
30
+ "openai": {
31
+ "api_base_url": "https://api.openai.com/v1",
32
+ "model": "gpt-4o-mini",
33
+ "description": "OpenAI (GPT-4o-mini)"
34
+ },
35
+ "groq": {
36
+ "api_base_url": "https://api.groq.com/openai/v1",
37
+ "model": "llama-3.3-70b-versatile",
38
+ "description": "Groq (fast inference)"
39
+ },
40
+ "custom": {
41
+ "api_base_url": "",
42
+ "model": "",
43
+ "description": "Custom OpenAI-compatible API"
44
+ }
45
+ }
46
+
47
+
48
+ def get_default_output_dir() -> str:
49
+ """Get platform-specific default downloads directory."""
50
+ return str(Path.home() / "Downloads")
51
+
52
+
53
+ def setup_config():
54
+ """Interactive config setup with API provider selection."""
55
+ print("=== aichat2md Configuration Setup ===\n")
56
+
57
+ # Step 1: Select API provider
58
+ print("Select API provider:")
59
+ for i, (key, preset) in enumerate(API_PRESETS.items(), 1):
60
+ print(f"{i}. {preset['description']}")
61
+
62
+ while True:
63
+ choice = input(f"\nChoice (1-{len(API_PRESETS)}) [1]: ").strip() or "1"
64
+ try:
65
+ choice_idx = int(choice) - 1
66
+ if 0 <= choice_idx < len(API_PRESETS):
67
+ break
68
+ except ValueError:
69
+ pass
70
+ print("Invalid choice, please try again")
71
+
72
+ provider_key = list(API_PRESETS.keys())[choice_idx]
73
+ preset = API_PRESETS[provider_key]
74
+
75
+ # Step 2: API configuration
76
+ api_key = input(f"\nEnter your {provider_key.upper()} API key: ").strip()
77
+
78
+ if provider_key == "custom":
79
+ api_base_url = input("Enter API base URL (e.g., http://localhost:8000): ").strip()
80
+ model = input("Enter model name: ").strip()
81
+ else:
82
+ api_base_url = preset["api_base_url"]
83
+ model = preset["model"]
84
+ print(f"Using: {api_base_url}")
85
+ print(f"Model: {model}")
86
+
87
+ # Step 3: Language selection
88
+ print("\nSelect language for AI prompts:")
89
+ print("1. English")
90
+ print("2. δΈ­ζ–‡ (Chinese)")
91
+ lang_choice = input("Choice (1-2) [1]: ").strip() or "1"
92
+ language = "zh" if lang_choice == "2" else "en"
93
+
94
+ # Step 4: Output directory
95
+ default_dir = get_default_output_dir()
96
+ output_dir = input(f"\nOutput directory (default: {default_dir}): ").strip()
97
+ if not output_dir:
98
+ output_dir = default_dir
99
+
100
+ # Create config
101
+ config = DEFAULT_CONFIG.copy()
102
+ config.update({
103
+ "api_key": api_key,
104
+ "api_base_url": api_base_url,
105
+ "model": model,
106
+ "language": language,
107
+ "output_dir": output_dir
108
+ })
109
+
110
+ # Save config
111
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
112
+ CONFIG_FILE.write_text(json.dumps(config, indent=2), encoding='utf-8')
113
+
114
+ print(f"\nβœ“ Configuration saved to {CONFIG_FILE}")
115
+
116
+
117
+ def load_config() -> Dict[str, Any]:
118
+ """Load configuration from file."""
119
+ if not CONFIG_FILE.exists():
120
+ raise FileNotFoundError(
121
+ f"Configuration file not found. Please run: aichat2md --setup"
122
+ )
123
+
124
+ try:
125
+ config = json.loads(CONFIG_FILE.read_text(encoding='utf-8'))
126
+ except json.JSONDecodeError as e:
127
+ raise ValueError(f"Invalid JSON in config file: {e}")
128
+
129
+ if not config.get("api_key"):
130
+ raise ValueError("API key not configured. Please run: aichat2md --setup")
131
+
132
+ # Merge with defaults for backward compatibility
133
+ full_config = DEFAULT_CONFIG.copy()
134
+ full_config.update(config)
135
+
136
+ return full_config
137
+
138
+
139
+ def validate_config(config: Dict[str, Any]) -> bool:
140
+ """Validate configuration has required fields."""
141
+ required_fields = ["api_key", "api_base_url", "model", "output_dir"]
142
+ return all(field in config and config[field] for field in required_fields)
@@ -0,0 +1,6 @@
1
+ """Content extractors for different sources."""
2
+
3
+ from .playwright_extractor import extract_from_url
4
+ from .webarchive_extractor import extract_from_webarchive
5
+
6
+ __all__ = ['extract_from_url', 'extract_from_webarchive']
@@ -0,0 +1,58 @@
1
+ """Extract content from ChatGPT share URLs using Playwright."""
2
+
3
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
4
+
5
+
6
+ def extract_from_url(url: str, timeout: int = 30000) -> str:
7
+ """
8
+ Extract text content from ChatGPT share URL.
9
+
10
+ Args:
11
+ url: ChatGPT share URL (e.g., https://chatgpt.com/share/...)
12
+ timeout: Page load timeout in milliseconds
13
+
14
+ Returns:
15
+ Extracted plain text content
16
+
17
+ Raises:
18
+ PlaywrightTimeoutError: If page fails to load
19
+ ValueError: If URL is invalid
20
+ """
21
+ if not url.startswith('http'):
22
+ raise ValueError(f"Invalid URL: {url}")
23
+
24
+ try:
25
+ with sync_playwright() as p:
26
+ browser = p.chromium.launch(headless=True)
27
+ page = browser.new_page()
28
+
29
+ # Navigate and wait for network idle
30
+ page.goto(url, wait_until='networkidle', timeout=timeout)
31
+
32
+ # Wait for conversation content to load
33
+ # ChatGPT share pages typically have conversation in main content area
34
+ page.wait_for_selector('main', timeout=10000)
35
+
36
+ # Extract plain text from body
37
+ content = page.inner_text('body')
38
+
39
+ browser.close()
40
+
41
+ return content.strip()
42
+
43
+ except PlaywrightTimeoutError as e:
44
+ raise PlaywrightTimeoutError(
45
+ f"Failed to load page within {timeout}ms. "
46
+ "Check your network connection and URL validity."
47
+ ) from e
48
+
49
+
50
+ if __name__ == "__main__":
51
+ # Manual test
52
+ import sys
53
+ if len(sys.argv) > 1:
54
+ url = sys.argv[1]
55
+ print(f"Extracting from: {url}")
56
+ content = extract_from_url(url)
57
+ print(f"Extracted {len(content)} characters")
58
+ print(content[:500])
@@ -0,0 +1,99 @@
1
+ """Extract content from Safari .webarchive files."""
2
+
3
+ import plistlib
4
+ from pathlib import Path
5
+ from html.parser import HTMLParser
6
+ from typing import List
7
+
8
+
9
+ class CleanHTMLParser(HTMLParser):
10
+ """HTML parser that extracts clean text, skipping scripts and styles."""
11
+
12
+ def __init__(self):
13
+ super().__init__()
14
+ self.text_chunks: List[str] = []
15
+ self.skip_tags = {'script', 'style', 'noscript'}
16
+ self.current_tag = None
17
+
18
+ def handle_starttag(self, tag, attrs):
19
+ if tag in self.skip_tags:
20
+ self.current_tag = tag
21
+
22
+ def handle_endtag(self, tag):
23
+ if tag == self.current_tag:
24
+ self.current_tag = None
25
+
26
+ def handle_data(self, data):
27
+ if self.current_tag is None:
28
+ # Clean whitespace but preserve structure
29
+ cleaned = data.strip()
30
+ if cleaned:
31
+ self.text_chunks.append(cleaned)
32
+
33
+ def get_text(self) -> str:
34
+ """Get extracted text with normalized spacing."""
35
+ return '\n'.join(self.text_chunks)
36
+
37
+
38
+ def extract_from_webarchive(filepath: str) -> str:
39
+ """
40
+ Extract text content from Safari .webarchive file.
41
+
42
+ Args:
43
+ filepath: Path to .webarchive file
44
+
45
+ Returns:
46
+ Extracted plain text content
47
+
48
+ Raises:
49
+ FileNotFoundError: If file doesn't exist
50
+ ValueError: If file is not a valid webarchive
51
+ """
52
+ path = Path(filepath)
53
+
54
+ if not path.exists():
55
+ raise FileNotFoundError(f"File not found: {filepath}")
56
+
57
+ if path.suffix.lower() != '.webarchive':
58
+ raise ValueError(f"Not a webarchive file: {filepath}")
59
+
60
+ try:
61
+ with open(filepath, 'rb') as f:
62
+ plist = plistlib.load(f)
63
+
64
+ # Extract main resource HTML data
65
+ if 'WebMainResource' not in plist:
66
+ raise ValueError("Invalid webarchive: missing WebMainResource")
67
+
68
+ main_resource = plist['WebMainResource']
69
+
70
+ if 'WebResourceData' not in main_resource:
71
+ raise ValueError("Invalid webarchive: missing WebResourceData")
72
+
73
+ html_data = main_resource['WebResourceData']
74
+
75
+ # Decode HTML (try UTF-8, fallback to latin-1)
76
+ try:
77
+ html = html_data.decode('utf-8')
78
+ except UnicodeDecodeError:
79
+ html = html_data.decode('latin-1', errors='ignore')
80
+
81
+ # Parse and clean HTML
82
+ parser = CleanHTMLParser()
83
+ parser.feed(html)
84
+
85
+ return parser.get_text()
86
+
87
+ except plistlib.InvalidFileException as e:
88
+ raise ValueError(f"Invalid webarchive format: {e}") from e
89
+
90
+
91
+ if __name__ == "__main__":
92
+ # Manual test
93
+ import sys
94
+ if len(sys.argv) > 1:
95
+ filepath = sys.argv[1]
96
+ print(f"Extracting from: {filepath}")
97
+ content = extract_from_webarchive(filepath)
98
+ print(f"Extracted {len(content)} characters")
99
+ print(content[:500])
@@ -0,0 +1 @@
1
+ """Prompt templates for different languages."""
@@ -0,0 +1,142 @@
1
+ """AI structurization using OpenAI-compatible APIs."""
2
+
3
+ import requests
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Dict, Any
7
+
8
+
9
+ def load_system_prompt(language: str) -> str:
10
+ """
11
+ Load system prompt for the specified language.
12
+
13
+ Args:
14
+ language: Language code ('en' or 'zh')
15
+
16
+ Returns:
17
+ System prompt text
18
+
19
+ Raises:
20
+ FileNotFoundError: If prompt file doesn't exist
21
+ """
22
+ prompt_file = Path(__file__).parent / "prompts" / f"system_prompt_{language}.txt"
23
+
24
+ if not prompt_file.exists():
25
+ raise FileNotFoundError(f"Prompt file not found: {prompt_file}")
26
+
27
+ return prompt_file.read_text(encoding='utf-8')
28
+
29
+
30
+ def structurize_content(
31
+ raw_text: str,
32
+ config: Dict[str, Any],
33
+ source: str = ""
34
+ ) -> str:
35
+ """
36
+ Structurize raw text into Markdown using OpenAI-compatible API.
37
+
38
+ Args:
39
+ raw_text: Raw extracted text from AI conversation
40
+ config: Configuration dict with API credentials
41
+ source: Original source URL or filename
42
+
43
+ Returns:
44
+ Structured Markdown content
45
+
46
+ Raises:
47
+ requests.exceptions.HTTPError: If API call fails
48
+ ValueError: If response is invalid
49
+ """
50
+ # Load system prompt based on language
51
+ language = config.get("language", "en")
52
+ system_prompt = load_system_prompt(language)
53
+
54
+ # Append source info to prompt if available
55
+ if source:
56
+ if language == "zh":
57
+ system_prompt += f"\n\nεŽŸε§‹ζ₯源: {source}"
58
+ else:
59
+ system_prompt += f"\n\nOriginal source: {source}"
60
+
61
+ # Construct API URL (ensure /v1/chat/completions endpoint)
62
+ api_base = config["api_base_url"].rstrip('/')
63
+ if not api_base.endswith('/v1'):
64
+ api_url = f"{api_base}/v1/chat/completions"
65
+ else:
66
+ api_url = f"{api_base}/chat/completions"
67
+
68
+ headers = {
69
+ 'Authorization': f'Bearer {config["api_key"]}',
70
+ 'Content-Type': 'application/json'
71
+ }
72
+
73
+ payload = {
74
+ 'model': config.get('model', 'deepseek-chat'),
75
+ 'messages': [
76
+ {'role': 'system', 'content': system_prompt},
77
+ {'role': 'user', 'content': raw_text}
78
+ ],
79
+ 'max_tokens': config.get('max_tokens', 4000),
80
+ 'temperature': config.get('temperature', 0.7)
81
+ }
82
+
83
+ try:
84
+ response = requests.post(api_url, headers=headers, json=payload, timeout=60)
85
+ response.raise_for_status()
86
+
87
+ result = response.json()
88
+
89
+ if 'choices' not in result or len(result['choices']) == 0:
90
+ raise ValueError("Invalid API response: missing choices")
91
+
92
+ markdown = result['choices'][0]['message']['content']
93
+
94
+ # Ensure front matter has date and source if not already present
95
+ if not markdown.startswith('---'):
96
+ # Add front matter if missing
97
+ today = datetime.now().strftime('%Y-%m-%d')
98
+ if language == "zh":
99
+ front_matter = f"""---
100
+ ζŠ€ζœ―ζ ‡η­Ύ: []
101
+ ζ—₯期: {today}
102
+ ζ₯源: {source or 'Unknown'}
103
+ ---
104
+
105
+ """
106
+ else:
107
+ front_matter = f"""---
108
+ tags: []
109
+ date: {today}
110
+ source: {source or 'Unknown'}
111
+ ---
112
+
113
+ """
114
+ markdown = front_matter + markdown
115
+
116
+ return markdown
117
+
118
+ except requests.exceptions.HTTPError as e:
119
+ if e.response.status_code == 401:
120
+ raise requests.exceptions.HTTPError(
121
+ "API authentication failed. Check your API key"
122
+ ) from e
123
+ elif e.response.status_code == 429:
124
+ raise requests.exceptions.HTTPError(
125
+ "Rate limit exceeded. Please wait and try again"
126
+ ) from e
127
+ else:
128
+ error_msg = f"API request failed: {e.response.status_code}"
129
+ try:
130
+ error_detail = e.response.json()
131
+ error_msg += f" - {error_detail}"
132
+ except:
133
+ error_msg += f" - {e.response.text[:200]}"
134
+ raise requests.exceptions.HTTPError(error_msg) from e
135
+
136
+ except requests.exceptions.Timeout:
137
+ raise TimeoutError(
138
+ "API request timed out. The conversation might be too long"
139
+ )
140
+
141
+ except requests.exceptions.RequestException as e:
142
+ raise RuntimeError(f"Network error: {e}") from e
@@ -0,0 +1,288 @@
1
+ Metadata-Version: 2.4
2
+ Name: aichat2md
3
+ Version: 1.0.0
4
+ Summary: Convert AI chat conversations to structured Markdown
5
+ Author: PlaceNameDay
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yourusername/aichat2md
8
+ Project-URL: Repository, https://github.com/yourusername/aichat2md
9
+ Project-URL: Issues, https://github.com/yourusername/aichat2md/issues
10
+ Keywords: chatgpt,claude,markdown,ai,converter,deepseek
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Documentation
21
+ Classifier: Topic :: Text Processing :: Markup :: Markdown
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: playwright>=1.40.0
26
+ Requires-Dist: requests>=2.31.0
27
+ Dynamic: license-file
28
+
29
+ # aichat2md
30
+
31
+ Convert AI chat conversations to structured Markdown documents.
32
+
33
+ ## Features
34
+
35
+ - 🌐 **Extract from URLs** - ChatGPT share links (with JS rendering via Playwright)
36
+ - πŸ“„ **Extract from webarchive** - Safari .webarchive files (offline mode)
37
+ - πŸ€– **Multiple AI backends** - DeepSeek, OpenAI, Groq, or any OpenAI-compatible API
38
+ - 🌍 **Bilingual support** - English/Chinese prompts
39
+ - πŸ“ **Clean output** - Knowledge-focused Markdown, not chat logs
40
+ - ⚑ **Simple CLI** - pip-installable, one-time setup
41
+
42
+ ## Quick Start
43
+
44
+ ```bash
45
+ # Install
46
+ pip install aichat2md
47
+
48
+ # Configure (one-time setup)
49
+ aichat2md --setup
50
+
51
+ # Convert a ChatGPT share URL
52
+ aichat2md https://chatgpt.com/share/xxx
53
+
54
+ # Convert a webarchive file
55
+ aichat2md ~/Downloads/chat.webarchive
56
+ ```
57
+
58
+ ## Supported AI Backends
59
+
60
+ - **DeepSeek** (default) - Cost-effective, Chinese service
61
+ - **OpenAI** - GPT-4o-mini, GPT-4
62
+ - **Groq** - Fast inference with Llama models
63
+ - **Custom** - Any OpenAI-compatible API
64
+
65
+ ## Installation
66
+
67
+ ### Prerequisites
68
+
69
+ - Python 3.8 or higher
70
+ - Playwright (automatically installed, but requires browser setup)
71
+
72
+ ### Install from PyPI
73
+
74
+ ```bash
75
+ pip install aichat2md
76
+ ```
77
+
78
+ ### Install Playwright browsers
79
+
80
+ ```bash
81
+ playwright install chromium
82
+ ```
83
+
84
+ ### First-time Setup
85
+
86
+ ```bash
87
+ aichat2md --setup
88
+ ```
89
+
90
+ You'll be prompted to:
91
+ 1. Select your AI provider (DeepSeek, OpenAI, Groq, or custom)
92
+ 2. Enter your API key
93
+ 3. Choose prompt language (English or Chinese)
94
+ 4. Set output directory (default: ~/Downloads)
95
+
96
+ ## Usage
97
+
98
+ ### Basic Usage
99
+
100
+ ```bash
101
+ # Convert from URL (uses configured output directory)
102
+ aichat2md https://chatgpt.com/share/xxx
103
+
104
+ # Convert from webarchive (outputs to same directory as input)
105
+ aichat2md ~/Downloads/chat.webarchive
106
+ ```
107
+
108
+ ### Override Language
109
+
110
+ ```bash
111
+ # Use Chinese prompts (even if English is configured)
112
+ aichat2md <url> --lang zh
113
+
114
+ # Use English prompts
115
+ aichat2md <url> --lang en
116
+ ```
117
+
118
+ ### Custom Output Path
119
+
120
+ ```bash
121
+ # Specify output file
122
+ aichat2md <url> -o ~/Documents/my-notes.md
123
+ aichat2md <url> --output ~/Documents/my-notes.md
124
+ ```
125
+
126
+ ### Override Model
127
+
128
+ ```bash
129
+ # Use a different model than configured
130
+ aichat2md <url> --model gpt-4o
131
+ aichat2md <url> --model deepseek-chat
132
+ ```
133
+
134
+ ### Version Info
135
+
136
+ ```bash
137
+ aichat2md --version
138
+ ```
139
+
140
+ ## Configuration
141
+
142
+ Configuration is stored in `~/.config/aichat2md/config.json` (cross-platform).
143
+
144
+ ### Example Config
145
+
146
+ ```json
147
+ {
148
+ "api_key": "sk-your-api-key",
149
+ "api_base_url": "https://api.deepseek.com",
150
+ "model": "deepseek-chat",
151
+ "language": "en",
152
+ "output_dir": "/Users/you/Downloads",
153
+ "max_tokens": 4000,
154
+ "temperature": 0.7
155
+ }
156
+ ```
157
+
158
+ ### Reconfigure
159
+
160
+ ```bash
161
+ aichat2md --setup
162
+ ```
163
+
164
+ ## Output Format
165
+
166
+ The tool converts chat conversations into structured Markdown with:
167
+
168
+ - **Front matter** - Tags, date, source
169
+ - **Summary** - 2-3 sentence overview
170
+ - **Key topics** - Bullet point list
171
+ - **Knowledge sections** - Reorganized content with logical headings
172
+ - **Code examples** - Extracted code blocks with comments
173
+
174
+ ### Example Output
175
+
176
+ ```markdown
177
+ ---
178
+ tags: [Python, API, Web]
179
+ date: 2026-02-02
180
+ source: https://chatgpt.com/share/xxx
181
+ ---
182
+
183
+ # Building REST APIs with FastAPI
184
+
185
+ ## Summary
186
+ This document covers building production-ready REST APIs using FastAPI...
187
+
188
+ ## Key Topics
189
+ - API design patterns
190
+ - Request validation
191
+ - Error handling
192
+
193
+ ## API Design Principles
194
+ ...
195
+
196
+ ## Code Examples
197
+ \```python
198
+ from fastapi import FastAPI
199
+ app = FastAPI()
200
+ ...
201
+ \```
202
+ ```
203
+
204
+ ## How It Works
205
+
206
+ 1. **Extract** - Playwright (URLs) or plistlib (webarchive) extracts raw text
207
+ 2. **Structurize** - AI API reorganizes into knowledge document
208
+ 3. **Save** - Auto-generated filename or specified path
209
+
210
+ ### Why Two-Stage Processing?
211
+
212
+ - **Stage 1 (Extract)** - No AI tokens used, just HTML parsing
213
+ - **Stage 2 (Structurize)** - AI organizes content efficiently
214
+
215
+ This saves costs and allows local caching of extracted content.
216
+
217
+ ## Development
218
+
219
+ ### Local Installation
220
+
221
+ ```bash
222
+ # Clone repository
223
+ git clone https://github.com/yourusername/aichat2md.git
224
+ cd aichat2md
225
+
226
+ # Install in editable mode
227
+ pip install -e .
228
+
229
+ # Install Playwright
230
+ playwright install chromium
231
+ ```
232
+
233
+ ### Run Tests
234
+
235
+ ```bash
236
+ pip install pytest
237
+ pytest tests/
238
+ ```
239
+
240
+ ### Build Package
241
+
242
+ ```bash
243
+ pip install build
244
+ python -m build
245
+ ```
246
+
247
+ ## Troubleshooting
248
+
249
+ ### "Configuration file not found"
250
+
251
+ Run `aichat2md --setup` to create configuration.
252
+
253
+ ### "API authentication failed"
254
+
255
+ Check your API key in `~/.config/aichat2md/config.json`.
256
+
257
+ ### Playwright errors
258
+
259
+ Install browsers: `playwright install chromium`
260
+
261
+ ### Empty output
262
+
263
+ The conversation might be too short or the AI response failed. Check error messages.
264
+
265
+ ## Contributing
266
+
267
+ Contributions welcome! Please:
268
+
269
+ 1. Fork the repository
270
+ 2. Create a feature branch
271
+ 3. Add tests for new features
272
+ 4. Submit a pull request
273
+
274
+ ## License
275
+
276
+ MIT License - see [LICENSE](LICENSE) file.
277
+
278
+ ## Links
279
+
280
+ - [GitHub Repository](https://github.com/yourusername/aichat2md)
281
+ - [Issue Tracker](https://github.com/yourusername/aichat2md/issues)
282
+ - [δΈ­ζ–‡ζ–‡ζ‘£](README_zh.md)
283
+
284
+ ## Acknowledgments
285
+
286
+ - [Playwright](https://playwright.dev/) - Web automation
287
+ - [DeepSeek](https://www.deepseek.com/) - Cost-effective AI API
288
+ - [OpenAI](https://openai.com/) - API compatibility standard
@@ -0,0 +1,14 @@
1
+ aichat2md/__init__.py,sha256=kNascuTiKyIixYwF9OeWUHo4ILlBRP9U9sGsFqFoczQ,196
2
+ aichat2md/cli.py,sha256=bu_lnD85xLB-xKA04iMwj4WgKC0IkkJsHMnsJSA65H4,6905
3
+ aichat2md/config.py,sha256=VO4fA_ByRKVRPa61W3VwIBjMPDsMt3iagFP2NkBSU7U,4351
4
+ aichat2md/structurizer.py,sha256=0v1Hjo9KYcurBEaKJNt4MaqfVHzgEfHmH-KbIPO1Zcg,4213
5
+ aichat2md/extractors/__init__.py,sha256=HzIWd2aZBACnWs2N2pPjIa7vjM-azPz-bqEviN0QgTs,217
6
+ aichat2md/extractors/playwright_extractor.py,sha256=eB3VLogTnv6uYm3DAfT_8t6CmIsyt3SIBo0Slgd7Rc4,1752
7
+ aichat2md/extractors/webarchive_extractor.py,sha256=eIZIVzLlBgO41Yzz8EKmjA8Diq3btlQO8S5mljDQWfs,2842
8
+ aichat2md/prompts/__init__.py,sha256=cPdhDyL1QeVhl5gVFYb50zYMi24iGmxz6R_rrVy1-yk,48
9
+ aichat2md-1.0.0.dist-info/licenses/LICENSE,sha256=g3TWU1mkL2Cn4XEm7hRrNHQySEheXc1VVy7cyQoXOyA,1069
10
+ aichat2md-1.0.0.dist-info/METADATA,sha256=SG4osBMsJ5Qblh-iA29q7mBysh9JSc5n8HG_e7k3kEs,6290
11
+ aichat2md-1.0.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
12
+ aichat2md-1.0.0.dist-info/entry_points.txt,sha256=N_gW2xKLteEm0vGAnhMcJQ6y8uRpOdlG4f477os5VLE,49
13
+ aichat2md-1.0.0.dist-info/top_level.txt,sha256=o9-3lW1WoPj9xi0KCcPJLVRBmkO8lbuNqKq9tk0qnNA,10
14
+ aichat2md-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ aichat2md = aichat2md.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 PlaceNameDay
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ aichat2md