aichat2md 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aichat2md-1.1.0 → aichat2md-1.2.0}/PKG-INFO +2 -1
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/__init__.py +1 -1
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/cli.py +23 -5
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/extractors/playwright_extractor.py +2 -2
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/structurizer.py +3 -1
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md.egg-info/PKG-INFO +2 -1
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md.egg-info/requires.txt +1 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/pyproject.toml +2 -1
- {aichat2md-1.1.0 → aichat2md-1.2.0}/LICENSE +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/README.md +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/config.py +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/extractors/__init__.py +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/extractors/webarchive_extractor.py +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/prompts/__init__.py +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/prompts/system_prompt_en.txt +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md/prompts/system_prompt_zh.txt +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md.egg-info/SOURCES.txt +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md.egg-info/dependency_links.txt +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md.egg-info/entry_points.txt +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/aichat2md.egg-info/top_level.txt +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/setup.cfg +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/tests/test_cli.py +0 -0
- {aichat2md-1.1.0 → aichat2md-1.2.0}/tests/test_config.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aichat2md
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Convert AI chat conversations to structured Markdown
|
|
5
5
|
Author: PlaceNameDay
|
|
6
6
|
License: MIT
|
|
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: playwright>=1.40.0
|
|
26
26
|
Requires-Dist: requests>=2.31.0
|
|
27
|
+
Requires-Dist: yaspin>=3.0.0
|
|
27
28
|
Dynamic: license-file
|
|
28
29
|
|
|
29
30
|
# aichat2md
|
|
@@ -15,6 +15,9 @@ import sys
|
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from typing import Tuple
|
|
18
|
+
import time
|
|
19
|
+
|
|
20
|
+
from yaspin import yaspin
|
|
18
21
|
|
|
19
22
|
from .config import setup_config, load_config
|
|
20
23
|
from .extractors.playwright_extractor import extract_from_url
|
|
@@ -23,6 +26,17 @@ from .structurizer import structurize_content
|
|
|
23
26
|
from . import __version__
|
|
24
27
|
|
|
25
28
|
|
|
29
|
+
class TimedText:
|
|
30
|
+
"""Dynamic text with elapsed time in seconds."""
|
|
31
|
+
def __init__(self, text: str):
|
|
32
|
+
self.text = text
|
|
33
|
+
self._start = time.time()
|
|
34
|
+
|
|
35
|
+
def __str__(self):
|
|
36
|
+
elapsed = int(time.time() - self._start)
|
|
37
|
+
return f"[{elapsed}s] {self.text}"
|
|
38
|
+
|
|
39
|
+
|
|
26
40
|
def sanitize_filename(title: str, max_length: int = 50) -> str:
|
|
27
41
|
"""
|
|
28
42
|
Sanitize title for use as filename.
|
|
@@ -87,15 +101,17 @@ def extract_content(input_path: str) -> Tuple[str, str]:
|
|
|
87
101
|
Tuple of (extracted_text, source_identifier)
|
|
88
102
|
"""
|
|
89
103
|
if input_path.startswith('http'):
|
|
90
|
-
|
|
91
|
-
|
|
104
|
+
with yaspin(text=TimedText(f"Extracting from URL (up to 60s): {input_path}")) as sp:
|
|
105
|
+
text = extract_from_url(input_path)
|
|
106
|
+
sp.ok(f"✓ Extracted {len(text)} characters")
|
|
92
107
|
source = input_path
|
|
93
108
|
else:
|
|
109
|
+
# Webarchive extraction is fast, no spinner needed
|
|
94
110
|
print(f"📄 Extracting from webarchive: {input_path}")
|
|
95
111
|
text = extract_from_webarchive(input_path)
|
|
112
|
+
print(f"✓ Extracted {len(text)} characters")
|
|
96
113
|
source = Path(input_path).name
|
|
97
114
|
|
|
98
|
-
print(f"✓ Extracted {len(text)} characters")
|
|
99
115
|
return text, source
|
|
100
116
|
|
|
101
117
|
|
|
@@ -221,8 +237,10 @@ Examples:
|
|
|
221
237
|
|
|
222
238
|
# Structurize with AI
|
|
223
239
|
provider = config.get("api_base_url", "API")
|
|
224
|
-
|
|
225
|
-
|
|
240
|
+
estimated = min(60 + len(raw_text) // 100, 600)
|
|
241
|
+
with yaspin(text=TimedText(f"Structurizing {len(raw_text)} chars with {provider} (~{estimated}s)")) as sp:
|
|
242
|
+
markdown = structurize_content(raw_text, config, source)
|
|
243
|
+
sp.ok("✓ Structurized")
|
|
226
244
|
|
|
227
245
|
# Determine output path
|
|
228
246
|
output_path = determine_output_path(args.input, markdown, config, args.output)
|
|
@@ -40,7 +40,7 @@ def _get_wait_time(platform: str) -> int:
|
|
|
40
40
|
return wait_times.get(platform, 2000)
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
def extract_from_url(url: str, timeout: int =
|
|
43
|
+
def extract_from_url(url: str, timeout: int = 60000) -> str:
|
|
44
44
|
"""
|
|
45
45
|
Extract text content from AI chat share URL.
|
|
46
46
|
|
|
@@ -70,7 +70,7 @@ def extract_from_url(url: str, timeout: int = 30000) -> str:
|
|
|
70
70
|
# Navigate with appropriate wait strategy
|
|
71
71
|
# Use 'load' for Gemini/Doubao (networkidle may timeout due to ongoing requests)
|
|
72
72
|
wait_strategy = 'load' if platform in ['gemini', 'doubao'] else 'networkidle'
|
|
73
|
-
page.goto(url, wait_until=wait_strategy, timeout=
|
|
73
|
+
page.goto(url, wait_until=wait_strategy, timeout=60000)
|
|
74
74
|
|
|
75
75
|
# Wait for content to load
|
|
76
76
|
# Try to wait for main selector (works for ChatGPT)
|
|
@@ -81,7 +81,9 @@ def structurize_content(
|
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
try:
|
|
84
|
-
|
|
84
|
+
# Dynamic timeout based on content size: 60s base + 1s per 100 chars, max 600s
|
|
85
|
+
estimated_timeout = min(60 + len(raw_text) // 100, 600)
|
|
86
|
+
response = requests.post(api_url, headers=headers, json=payload, timeout=estimated_timeout)
|
|
85
87
|
response.raise_for_status()
|
|
86
88
|
|
|
87
89
|
result = response.json()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: aichat2md
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Convert AI chat conversations to structured Markdown
|
|
5
5
|
Author: PlaceNameDay
|
|
6
6
|
License: MIT
|
|
@@ -24,6 +24,7 @@ Description-Content-Type: text/markdown
|
|
|
24
24
|
License-File: LICENSE
|
|
25
25
|
Requires-Dist: playwright>=1.40.0
|
|
26
26
|
Requires-Dist: requests>=2.31.0
|
|
27
|
+
Requires-Dist: yaspin>=3.0.0
|
|
27
28
|
Dynamic: license-file
|
|
28
29
|
|
|
29
30
|
# aichat2md
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "aichat2md"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
description = "Convert AI chat conversations to structured Markdown"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.8"
|
|
@@ -28,6 +28,7 @@ classifiers = [
|
|
|
28
28
|
dependencies = [
|
|
29
29
|
"playwright>=1.40.0",
|
|
30
30
|
"requests>=2.31.0",
|
|
31
|
+
"yaspin>=3.0.0",
|
|
31
32
|
]
|
|
32
33
|
|
|
33
34
|
[project.scripts]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|