openconvert 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openconvert/__init__.py +7 -0
- openconvert/cli.py +145 -0
- openconvert/converter.py +152 -0
- openconvert/converters/__init__.py +3 -0
- openconvert/converters/archive_converter.py +277 -0
- openconvert/converters/audio_converter.py +223 -0
- openconvert/converters/code_converter.py +412 -0
- openconvert/converters/document_converter.py +596 -0
- openconvert/converters/image_converter.py +214 -0
- openconvert/converters/model_converter.py +208 -0
- openconvert/converters/video_converter.py +259 -0
- openconvert/launcher.py +0 -0
- openconvert-0.1.0.dist-info/METADATA +232 -0
- openconvert-0.1.0.dist-info/RECORD +17 -0
- openconvert-0.1.0.dist-info/WHEEL +5 -0
- openconvert-0.1.0.dist-info/entry_points.txt +2 -0
- openconvert-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,223 @@
|
|
1
|
+
"""
|
2
|
+
Audio converter module for handling audio format conversions.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import tempfile
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import Union, Optional, Dict, Any
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
# Define supported conversions
|
14
|
+
SUPPORTED_CONVERSIONS = {
|
15
|
+
'mp3': ['wav', 'ogg', 'flac', 'aac'],
|
16
|
+
'wav': ['mp3', 'ogg', 'flac', 'aac'],
|
17
|
+
'ogg': ['mp3', 'wav', 'flac', 'aac'],
|
18
|
+
'flac': ['mp3', 'wav', 'ogg', 'aac'],
|
19
|
+
'aac': ['mp3', 'wav', 'ogg', 'flac']
|
20
|
+
}
|
21
|
+
|
22
|
+
def convert(
|
23
|
+
filepath: Union[str, Path],
|
24
|
+
source_format: str,
|
25
|
+
target_format: str,
|
26
|
+
output_path: Union[str, Path],
|
27
|
+
options: Optional[Dict[str, Any]] = None
|
28
|
+
) -> str:
|
29
|
+
"""
|
30
|
+
Convert an audio file from one format to another.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
filepath: Path to the source audio file
|
34
|
+
source_format: Source audio format
|
35
|
+
target_format: Target audio format
|
36
|
+
output_path: Path to save the converted audio
|
37
|
+
options: Additional conversion options
|
38
|
+
|
39
|
+
Returns:
|
40
|
+
Path to the converted audio
|
41
|
+
|
42
|
+
Raises:
|
43
|
+
ValueError: If the conversion is not supported
|
44
|
+
RuntimeError: If the conversion fails
|
45
|
+
"""
|
46
|
+
if options is None:
|
47
|
+
options = {}
|
48
|
+
|
49
|
+
# Check if conversion is supported
|
50
|
+
if target_format not in SUPPORTED_CONVERSIONS.get(source_format, []):
|
51
|
+
raise ValueError(f"Conversion from {source_format} to {target_format} is not supported")
|
52
|
+
|
53
|
+
filepath = Path(filepath)
|
54
|
+
output_path = Path(output_path)
|
55
|
+
|
56
|
+
try:
|
57
|
+
# Try using pydub for audio conversion
|
58
|
+
return _convert_with_pydub(filepath, source_format, target_format, output_path, options)
|
59
|
+
except ImportError:
|
60
|
+
logger.warning("pydub not installed. Trying alternative method...")
|
61
|
+
|
62
|
+
# Try using ffmpeg directly
|
63
|
+
return _convert_with_ffmpeg(filepath, source_format, target_format, output_path, options)
|
64
|
+
|
65
|
+
def speech_to_text(
|
66
|
+
filepath: Union[str, Path],
|
67
|
+
output_path: Union[str, Path],
|
68
|
+
options: Optional[Dict[str, Any]] = None
|
69
|
+
) -> str:
|
70
|
+
"""
|
71
|
+
Convert speech audio to text.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
filepath: Path to the source audio file
|
75
|
+
output_path: Path to save the text file
|
76
|
+
options: Additional conversion options
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
Path to the text file
|
80
|
+
|
81
|
+
Raises:
|
82
|
+
RuntimeError: If the conversion fails
|
83
|
+
"""
|
84
|
+
if options is None:
|
85
|
+
options = {}
|
86
|
+
|
87
|
+
filepath = Path(filepath)
|
88
|
+
output_path = Path(output_path)
|
89
|
+
|
90
|
+
try:
|
91
|
+
# Try using SpeechRecognition library
|
92
|
+
import speech_recognition as sr
|
93
|
+
|
94
|
+
# Initialize recognizer
|
95
|
+
recognizer = sr.Recognizer()
|
96
|
+
|
97
|
+
# Load the audio file
|
98
|
+
with sr.AudioFile(str(filepath)) as source:
|
99
|
+
# Adjust for ambient noise
|
100
|
+
recognizer.adjust_for_ambient_noise(source)
|
101
|
+
|
102
|
+
# Record the audio
|
103
|
+
audio_data = recognizer.record(source)
|
104
|
+
|
105
|
+
# Recognize speech using Google Speech Recognition
|
106
|
+
language = options.get('language', 'en-US')
|
107
|
+
text = recognizer.recognize_google(audio_data, language=language)
|
108
|
+
|
109
|
+
# Write the text to the output file
|
110
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
111
|
+
f.write(text)
|
112
|
+
|
113
|
+
return str(output_path)
|
114
|
+
|
115
|
+
except ImportError:
|
116
|
+
logger.error("SpeechRecognition library is required for speech-to-text conversion")
|
117
|
+
raise RuntimeError("SpeechRecognition library is required for speech-to-text conversion. Please install it.")
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
logger.error(f"Error in speech-to-text conversion: {str(e)}")
|
121
|
+
raise RuntimeError(f"Failed to convert speech to text: {str(e)}")
|
122
|
+
|
123
|
+
def _convert_with_pydub(
|
124
|
+
filepath: Path,
|
125
|
+
source_format: str,
|
126
|
+
target_format: str,
|
127
|
+
output_path: Path,
|
128
|
+
options: Dict[str, Any]
|
129
|
+
) -> str:
|
130
|
+
"""Convert audio using pydub library."""
|
131
|
+
from pydub import AudioSegment
|
132
|
+
|
133
|
+
# Load the audio file
|
134
|
+
audio = AudioSegment.from_file(str(filepath), format=source_format)
|
135
|
+
|
136
|
+
# Apply audio processing options
|
137
|
+
if 'volume' in options:
|
138
|
+
# Adjust volume (in dB)
|
139
|
+
audio = audio + options['volume']
|
140
|
+
|
141
|
+
if 'speed' in options:
|
142
|
+
# Change speed (requires ffmpeg with rubberband)
|
143
|
+
speed = options['speed']
|
144
|
+
audio = audio._spawn(audio.raw_data, overrides={
|
145
|
+
"frame_rate": int(audio.frame_rate * speed)
|
146
|
+
})
|
147
|
+
|
148
|
+
if 'sample_rate' in options:
|
149
|
+
# Change sample rate
|
150
|
+
audio = audio.set_frame_rate(options['sample_rate'])
|
151
|
+
|
152
|
+
if 'channels' in options:
|
153
|
+
# Change number of channels
|
154
|
+
if options['channels'] == 1:
|
155
|
+
audio = audio.set_channels(1)
|
156
|
+
elif options['channels'] == 2:
|
157
|
+
audio = audio.set_channels(2)
|
158
|
+
|
159
|
+
if 'trim' in options:
|
160
|
+
# Trim audio (start_ms, end_ms)
|
161
|
+
start_ms, end_ms = options['trim']
|
162
|
+
audio = audio[start_ms:end_ms]
|
163
|
+
|
164
|
+
# Set export parameters
|
165
|
+
export_params = {}
|
166
|
+
|
167
|
+
if 'bitrate' in options:
|
168
|
+
export_params['bitrate'] = options['bitrate']
|
169
|
+
|
170
|
+
# Export to target format
|
171
|
+
audio.export(str(output_path), format=target_format, **export_params)
|
172
|
+
|
173
|
+
return str(output_path)
|
174
|
+
|
175
|
+
def _convert_with_ffmpeg(
|
176
|
+
filepath: Path,
|
177
|
+
source_format: str,
|
178
|
+
target_format: str,
|
179
|
+
output_path: Path,
|
180
|
+
options: Dict[str, Any]
|
181
|
+
) -> str:
|
182
|
+
"""Convert audio using ffmpeg directly."""
|
183
|
+
import subprocess
|
184
|
+
|
185
|
+
# Basic ffmpeg command
|
186
|
+
cmd = ['ffmpeg', '-i', str(filepath)]
|
187
|
+
|
188
|
+
# Add options
|
189
|
+
if 'volume' in options:
|
190
|
+
# Volume adjustment in dB
|
191
|
+
cmd.extend(['-filter:a', f'volume={options["volume"]}dB'])
|
192
|
+
|
193
|
+
if 'speed' in options:
|
194
|
+
# Speed adjustment
|
195
|
+
cmd.extend(['-filter:a', f'atempo={options["speed"]}'])
|
196
|
+
|
197
|
+
if 'sample_rate' in options:
|
198
|
+
# Sample rate
|
199
|
+
cmd.extend(['-ar', str(options['sample_rate'])])
|
200
|
+
|
201
|
+
if 'channels' in options:
|
202
|
+
# Number of channels
|
203
|
+
cmd.extend(['-ac', str(options['channels'])])
|
204
|
+
|
205
|
+
if 'bitrate' in options:
|
206
|
+
# Bitrate
|
207
|
+
cmd.extend(['-b:a', options['bitrate']])
|
208
|
+
|
209
|
+
if 'trim' in options:
|
210
|
+
# Trim (start_seconds, duration_seconds)
|
211
|
+
start_sec, duration_sec = options['trim']
|
212
|
+
cmd.extend(['-ss', str(start_sec), '-t', str(duration_sec)])
|
213
|
+
|
214
|
+
# Add output file
|
215
|
+
cmd.append(str(output_path))
|
216
|
+
|
217
|
+
# Run ffmpeg
|
218
|
+
try:
|
219
|
+
subprocess.run(cmd, check=True, capture_output=True)
|
220
|
+
return str(output_path)
|
221
|
+
except subprocess.SubprocessError as e:
|
222
|
+
logger.error(f"Error running ffmpeg: {str(e)}")
|
223
|
+
raise RuntimeError(f"Failed to convert audio with ffmpeg: {str(e)}")
|
@@ -0,0 +1,412 @@
|
|
1
|
+
"""
|
2
|
+
Code and markup converter module for handling code and markup format conversions.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import tempfile
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import Union, Optional, Dict, Any
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
# Define supported conversions
|
14
|
+
SUPPORTED_CONVERSIONS = {
|
15
|
+
'json': ['xml', 'yaml', 'csv', 'txt'],
|
16
|
+
'yaml': ['json', 'xml', 'txt'],
|
17
|
+
'xml': ['json', 'yaml', 'txt'],
|
18
|
+
'html': ['md', 'txt', 'pdf'],
|
19
|
+
'md': ['html', 'txt', 'pdf'],
|
20
|
+
'latex': ['pdf', 'docx', 'html']
|
21
|
+
}
|
22
|
+
|
23
|
+
def convert(
|
24
|
+
filepath: Union[str, Path],
|
25
|
+
source_format: str,
|
26
|
+
target_format: str,
|
27
|
+
output_path: Union[str, Path],
|
28
|
+
options: Optional[Dict[str, Any]] = None
|
29
|
+
) -> str:
|
30
|
+
"""
|
31
|
+
Convert code or markup from one format to another.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
filepath: Path to the source file
|
35
|
+
source_format: Source format
|
36
|
+
target_format: Target format
|
37
|
+
output_path: Path to save the converted file
|
38
|
+
options: Additional conversion options
|
39
|
+
|
40
|
+
Returns:
|
41
|
+
Path to the converted file
|
42
|
+
|
43
|
+
Raises:
|
44
|
+
ValueError: If the conversion is not supported
|
45
|
+
RuntimeError: If the conversion fails
|
46
|
+
"""
|
47
|
+
if options is None:
|
48
|
+
options = {}
|
49
|
+
|
50
|
+
# Check if conversion is supported
|
51
|
+
if target_format not in SUPPORTED_CONVERSIONS.get(source_format, []):
|
52
|
+
raise ValueError(f"Conversion from {source_format} to {target_format} is not supported")
|
53
|
+
|
54
|
+
filepath = Path(filepath)
|
55
|
+
output_path = Path(output_path)
|
56
|
+
|
57
|
+
try:
|
58
|
+
# Route to appropriate conversion method
|
59
|
+
if source_format == 'json':
|
60
|
+
return _convert_from_json(filepath, target_format, output_path, options)
|
61
|
+
elif source_format == 'yaml':
|
62
|
+
return _convert_from_yaml(filepath, target_format, output_path, options)
|
63
|
+
elif source_format == 'xml':
|
64
|
+
return _convert_from_xml(filepath, target_format, output_path, options)
|
65
|
+
elif source_format == 'html':
|
66
|
+
return _convert_from_html(filepath, target_format, output_path, options)
|
67
|
+
elif source_format == 'md':
|
68
|
+
return _convert_from_md(filepath, target_format, output_path, options)
|
69
|
+
elif source_format == 'latex':
|
70
|
+
return _convert_from_latex(filepath, target_format, output_path, options)
|
71
|
+
else:
|
72
|
+
raise ValueError(f"Unsupported source format: {source_format}")
|
73
|
+
|
74
|
+
except Exception as e:
|
75
|
+
logger.error(f"Error converting {filepath} to {target_format}: {str(e)}")
|
76
|
+
raise RuntimeError(f"Failed to convert {filepath} to {target_format}: {str(e)}")
|
77
|
+
|
78
|
+
def _convert_from_json(
|
79
|
+
filepath: Path,
|
80
|
+
target_format: str,
|
81
|
+
output_path: Path,
|
82
|
+
options: Dict[str, Any]
|
83
|
+
) -> str:
|
84
|
+
"""Convert from JSON to other formats."""
|
85
|
+
import json
|
86
|
+
|
87
|
+
# Read the JSON file
|
88
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
89
|
+
data = json.load(f)
|
90
|
+
|
91
|
+
if target_format == 'xml':
|
92
|
+
try:
|
93
|
+
import dicttoxml
|
94
|
+
|
95
|
+
# Convert to XML
|
96
|
+
xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
|
97
|
+
|
98
|
+
# Write to file
|
99
|
+
with open(output_path, 'wb') as f:
|
100
|
+
f.write(xml)
|
101
|
+
|
102
|
+
except ImportError:
|
103
|
+
raise RuntimeError("dicttoxml library is required for JSON to XML conversion. Please install it.")
|
104
|
+
|
105
|
+
elif target_format == 'yaml':
|
106
|
+
try:
|
107
|
+
import yaml
|
108
|
+
|
109
|
+
# Convert to YAML
|
110
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
111
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
112
|
+
|
113
|
+
except ImportError:
|
114
|
+
raise RuntimeError("PyYAML library is required for JSON to YAML conversion. Please install it.")
|
115
|
+
|
116
|
+
elif target_format == 'csv':
|
117
|
+
try:
|
118
|
+
import csv
|
119
|
+
|
120
|
+
# Check if the JSON is a list of dictionaries (suitable for CSV)
|
121
|
+
if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
|
122
|
+
raise ValueError("JSON must be a list of dictionaries to convert to CSV")
|
123
|
+
|
124
|
+
# Get all unique keys as CSV headers
|
125
|
+
headers = set()
|
126
|
+
for item in data:
|
127
|
+
headers.update(item.keys())
|
128
|
+
headers = sorted(headers)
|
129
|
+
|
130
|
+
# Write to CSV
|
131
|
+
with open(output_path, 'w', encoding='utf-8', newline='') as f:
|
132
|
+
writer = csv.DictWriter(f, fieldnames=headers)
|
133
|
+
writer.writeheader()
|
134
|
+
writer.writerows(data)
|
135
|
+
|
136
|
+
except ValueError as e:
|
137
|
+
raise ValueError(f"Error converting JSON to CSV: {str(e)}")
|
138
|
+
|
139
|
+
elif target_format == 'txt':
|
140
|
+
# Simple pretty-printed JSON to text
|
141
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
142
|
+
json.dump(data, f, indent=4)
|
143
|
+
|
144
|
+
return str(output_path)
|
145
|
+
|
146
|
+
def _convert_from_yaml(
|
147
|
+
filepath: Path,
|
148
|
+
target_format: str,
|
149
|
+
output_path: Path,
|
150
|
+
options: Dict[str, Any]
|
151
|
+
) -> str:
|
152
|
+
"""Convert from YAML to other formats."""
|
153
|
+
try:
|
154
|
+
import yaml
|
155
|
+
|
156
|
+
# Read the YAML file
|
157
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
158
|
+
data = yaml.safe_load(f)
|
159
|
+
|
160
|
+
if target_format == 'json':
|
161
|
+
import json
|
162
|
+
|
163
|
+
# Convert to JSON
|
164
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
165
|
+
json.dump(data, f, indent=4)
|
166
|
+
|
167
|
+
elif target_format == 'xml':
|
168
|
+
import dicttoxml
|
169
|
+
|
170
|
+
# Convert to XML
|
171
|
+
xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
|
172
|
+
|
173
|
+
# Write to file
|
174
|
+
with open(output_path, 'wb') as f:
|
175
|
+
f.write(xml)
|
176
|
+
|
177
|
+
elif target_format == 'txt':
|
178
|
+
# Simple YAML to text (just copy the content)
|
179
|
+
with open(filepath, 'r', encoding='utf-8') as f_in:
|
180
|
+
with open(output_path, 'w', encoding='utf-8') as f_out:
|
181
|
+
f_out.write(f_in.read())
|
182
|
+
|
183
|
+
except ImportError:
|
184
|
+
raise RuntimeError("PyYAML library is required for YAML conversions. Please install it.")
|
185
|
+
|
186
|
+
return str(output_path)
|
187
|
+
|
188
|
+
def _convert_from_xml(
|
189
|
+
filepath: Path,
|
190
|
+
target_format: str,
|
191
|
+
output_path: Path,
|
192
|
+
options: Dict[str, Any]
|
193
|
+
) -> str:
|
194
|
+
"""Convert from XML to other formats."""
|
195
|
+
try:
|
196
|
+
import xmltodict
|
197
|
+
|
198
|
+
# Read the XML file
|
199
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
200
|
+
xml_content = f.read()
|
201
|
+
|
202
|
+
# Convert XML to dict
|
203
|
+
data = xmltodict.parse(xml_content)
|
204
|
+
|
205
|
+
if target_format == 'json':
|
206
|
+
import json
|
207
|
+
|
208
|
+
# Convert to JSON
|
209
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
210
|
+
json.dump(data, f, indent=4)
|
211
|
+
|
212
|
+
elif target_format == 'yaml':
|
213
|
+
import yaml
|
214
|
+
|
215
|
+
# Convert to YAML
|
216
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
217
|
+
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
218
|
+
|
219
|
+
elif target_format == 'txt':
|
220
|
+
# Simple XML to text (just copy the content)
|
221
|
+
with open(filepath, 'r', encoding='utf-8') as f_in:
|
222
|
+
with open(output_path, 'w', encoding='utf-8') as f_out:
|
223
|
+
f_out.write(f_in.read())
|
224
|
+
|
225
|
+
except ImportError:
|
226
|
+
raise RuntimeError("xmltodict library is required for XML conversions. Please install it.")
|
227
|
+
|
228
|
+
return str(output_path)
|
229
|
+
|
230
|
+
def _convert_from_html(
|
231
|
+
filepath: Path,
|
232
|
+
target_format: str,
|
233
|
+
output_path: Path,
|
234
|
+
options: Dict[str, Any]
|
235
|
+
) -> str:
|
236
|
+
"""Convert from HTML to other formats."""
|
237
|
+
if target_format == 'md':
|
238
|
+
try:
|
239
|
+
import html2text
|
240
|
+
|
241
|
+
# Read the HTML file
|
242
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
243
|
+
html_content = f.read()
|
244
|
+
|
245
|
+
# Convert to Markdown
|
246
|
+
h = html2text.HTML2Text()
|
247
|
+
h.ignore_links = options.get('ignore_links', False)
|
248
|
+
h.ignore_images = options.get('ignore_images', False)
|
249
|
+
h.body_width = options.get('body_width', 0) # 0 means no wrapping
|
250
|
+
|
251
|
+
markdown = h.handle(html_content)
|
252
|
+
|
253
|
+
# Write to file
|
254
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
255
|
+
f.write(markdown)
|
256
|
+
|
257
|
+
except ImportError:
|
258
|
+
# Alternative method using pandoc
|
259
|
+
_convert_using_pandoc(filepath, target_format, output_path)
|
260
|
+
|
261
|
+
elif target_format == 'txt':
|
262
|
+
try:
|
263
|
+
from bs4 import BeautifulSoup
|
264
|
+
|
265
|
+
# Read the HTML file
|
266
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
267
|
+
html_content = f.read()
|
268
|
+
|
269
|
+
# Parse HTML and extract text
|
270
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
271
|
+
text = soup.get_text(separator='\n\n')
|
272
|
+
|
273
|
+
# Write to file
|
274
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
275
|
+
f.write(text)
|
276
|
+
|
277
|
+
except ImportError:
|
278
|
+
# Alternative method using pandoc
|
279
|
+
_convert_using_pandoc(filepath, target_format, output_path)
|
280
|
+
|
281
|
+
elif target_format == 'pdf':
|
282
|
+
# Use pandoc for HTML to PDF conversion
|
283
|
+
_convert_using_pandoc(filepath, target_format, output_path)
|
284
|
+
|
285
|
+
return str(output_path)
|
286
|
+
|
287
|
+
def _convert_from_md(
|
288
|
+
filepath: Path,
|
289
|
+
target_format: str,
|
290
|
+
output_path: Path,
|
291
|
+
options: Dict[str, Any]
|
292
|
+
) -> str:
|
293
|
+
"""Convert from Markdown to other formats."""
|
294
|
+
if target_format == 'html':
|
295
|
+
try:
|
296
|
+
import markdown
|
297
|
+
|
298
|
+
# Read the Markdown file
|
299
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
300
|
+
md_content = f.read()
|
301
|
+
|
302
|
+
# Convert to HTML
|
303
|
+
html = markdown.markdown(md_content, extensions=['tables', 'fenced_code'])
|
304
|
+
|
305
|
+
# Add HTML boilerplate
|
306
|
+
html = f"""<!DOCTYPE html>
|
307
|
+
<html>
|
308
|
+
<head>
|
309
|
+
<meta charset="utf-8">
|
310
|
+
<title>{filepath.stem}</title>
|
311
|
+
<style>
|
312
|
+
body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; max-width: 800px; margin: 0 auto; }}
|
313
|
+
pre {{ background-color: #f5f5f5; padding: 10px; border-radius: 5px; overflow-x: auto; }}
|
314
|
+
code {{ font-family: monospace; }}
|
315
|
+
img {{ max-width: 100%; }}
|
316
|
+
table {{ border-collapse: collapse; width: 100%; }}
|
317
|
+
th, td {{ border: 1px solid #ddd; padding: 8px; }}
|
318
|
+
th {{ background-color: #f2f2f2; }}
|
319
|
+
</style>
|
320
|
+
</head>
|
321
|
+
<body>
|
322
|
+
{html}
|
323
|
+
</body>
|
324
|
+
</html>"""
|
325
|
+
|
326
|
+
# Write to file
|
327
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
328
|
+
f.write(html)
|
329
|
+
|
330
|
+
except ImportError:
|
331
|
+
# Alternative method using pandoc
|
332
|
+
_convert_using_pandoc(filepath, target_format, output_path)
|
333
|
+
|
334
|
+
elif target_format == 'txt':
|
335
|
+
# Simple conversion - just strip markdown syntax
|
336
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
337
|
+
md_content = f.read()
|
338
|
+
|
339
|
+
# Very basic markdown stripping
|
340
|
+
import re
|
341
|
+
|
342
|
+
# Remove headers
|
343
|
+
text = re.sub(r'^#+\s+', '', md_content, flags=re.MULTILINE)
|
344
|
+
|
345
|
+
# Remove bold/italic
|
346
|
+
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
|
347
|
+
text = re.sub(r'\*(.*?)\*', r'\1', text)
|
348
|
+
|
349
|
+
# Remove links
|
350
|
+
text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)
|
351
|
+
|
352
|
+
# Remove code blocks
|
353
|
+
text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
|
354
|
+
|
355
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
356
|
+
f.write(text)
|
357
|
+
|
358
|
+
elif target_format == 'pdf':
|
359
|
+
# Use pandoc for Markdown to PDF conversion
|
360
|
+
_convert_using_pandoc(filepath, target_format, output_path)
|
361
|
+
|
362
|
+
return str(output_path)
|
363
|
+
|
364
|
+
def _convert_from_latex(
|
365
|
+
filepath: Path,
|
366
|
+
target_format: str,
|
367
|
+
output_path: Path,
|
368
|
+
options: Dict[str, Any]
|
369
|
+
) -> str:
|
370
|
+
"""Convert from LaTeX to other formats."""
|
371
|
+
# Use pandoc for all LaTeX conversions
|
372
|
+
_convert_using_pandoc(filepath, target_format, output_path)
|
373
|
+
return str(output_path)
|
374
|
+
|
375
|
+
def _convert_using_pandoc(
|
376
|
+
filepath: Path,
|
377
|
+
target_format: str,
|
378
|
+
output_path: Path
|
379
|
+
) -> None:
|
380
|
+
"""Use pandoc for document conversion."""
|
381
|
+
try:
|
382
|
+
import subprocess
|
383
|
+
|
384
|
+
# Map our format names to pandoc format names
|
385
|
+
format_map = {
|
386
|
+
'md': 'markdown',
|
387
|
+
'html': 'html',
|
388
|
+
'pdf': 'pdf',
|
389
|
+
'docx': 'docx',
|
390
|
+
'latex': 'latex',
|
391
|
+
'txt': 'plain'
|
392
|
+
}
|
393
|
+
|
394
|
+
source_ext = filepath.suffix.lower().lstrip('.')
|
395
|
+
if source_ext == 'tex':
|
396
|
+
source_ext = 'latex'
|
397
|
+
|
398
|
+
pandoc_source = format_map.get(source_ext, source_ext)
|
399
|
+
pandoc_target = format_map.get(target_format, target_format)
|
400
|
+
|
401
|
+
cmd = [
|
402
|
+
'pandoc',
|
403
|
+
'-f', pandoc_source,
|
404
|
+
'-t', pandoc_target,
|
405
|
+
'-o', str(output_path),
|
406
|
+
str(filepath)
|
407
|
+
]
|
408
|
+
|
409
|
+
subprocess.run(cmd, check=True, capture_output=True)
|
410
|
+
|
411
|
+
except (ImportError, subprocess.SubprocessError) as e:
|
412
|
+
raise RuntimeError(f"Failed to convert using pandoc: {str(e)}. Please install pandoc.")
|