openconvert 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openconvert/__init__.py +108 -3
- openconvert/__main__.py +13 -0
- openconvert/client.py +392 -0
- openconvert/openconvert_cli.py +543 -0
- openconvert-1.0.0.dist-info/METADATA +504 -0
- openconvert-1.0.0.dist-info/RECORD +10 -0
- openconvert-1.0.0.dist-info/entry_points.txt +2 -0
- openconvert-1.0.0.dist-info/licenses/LICENSE +21 -0
- openconvert/cli.py +0 -145
- openconvert/converter.py +0 -152
- openconvert/converters/__init__.py +0 -3
- openconvert/converters/archive_converter.py +0 -277
- openconvert/converters/audio_converter.py +0 -223
- openconvert/converters/code_converter.py +0 -412
- openconvert/converters/document_converter.py +0 -596
- openconvert/converters/image_converter.py +0 -214
- openconvert/converters/model_converter.py +0 -208
- openconvert/converters/video_converter.py +0 -259
- openconvert/launcher.py +0 -0
- openconvert-0.1.0.dist-info/METADATA +0 -232
- openconvert-0.1.0.dist-info/RECORD +0 -17
- openconvert-0.1.0.dist-info/entry_points.txt +0 -2
- {openconvert-0.1.0.dist-info → openconvert-1.0.0.dist-info}/WHEEL +0 -0
- {openconvert-0.1.0.dist-info → openconvert-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,412 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Code and markup converter module for handling code and markup format conversions.
|
3
|
-
"""
|
4
|
-
|
5
|
-
import logging
|
6
|
-
import os
|
7
|
-
import tempfile
|
8
|
-
from pathlib import Path
|
9
|
-
from typing import Union, Optional, Dict, Any
|
10
|
-
|
11
|
-
logger = logging.getLogger(__name__)
|
12
|
-
|
13
|
-
# Define supported conversions
|
14
|
-
SUPPORTED_CONVERSIONS = {
|
15
|
-
'json': ['xml', 'yaml', 'csv', 'txt'],
|
16
|
-
'yaml': ['json', 'xml', 'txt'],
|
17
|
-
'xml': ['json', 'yaml', 'txt'],
|
18
|
-
'html': ['md', 'txt', 'pdf'],
|
19
|
-
'md': ['html', 'txt', 'pdf'],
|
20
|
-
'latex': ['pdf', 'docx', 'html']
|
21
|
-
}
|
22
|
-
|
23
|
-
def convert(
|
24
|
-
filepath: Union[str, Path],
|
25
|
-
source_format: str,
|
26
|
-
target_format: str,
|
27
|
-
output_path: Union[str, Path],
|
28
|
-
options: Optional[Dict[str, Any]] = None
|
29
|
-
) -> str:
|
30
|
-
"""
|
31
|
-
Convert code or markup from one format to another.
|
32
|
-
|
33
|
-
Args:
|
34
|
-
filepath: Path to the source file
|
35
|
-
source_format: Source format
|
36
|
-
target_format: Target format
|
37
|
-
output_path: Path to save the converted file
|
38
|
-
options: Additional conversion options
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
Path to the converted file
|
42
|
-
|
43
|
-
Raises:
|
44
|
-
ValueError: If the conversion is not supported
|
45
|
-
RuntimeError: If the conversion fails
|
46
|
-
"""
|
47
|
-
if options is None:
|
48
|
-
options = {}
|
49
|
-
|
50
|
-
# Check if conversion is supported
|
51
|
-
if target_format not in SUPPORTED_CONVERSIONS.get(source_format, []):
|
52
|
-
raise ValueError(f"Conversion from {source_format} to {target_format} is not supported")
|
53
|
-
|
54
|
-
filepath = Path(filepath)
|
55
|
-
output_path = Path(output_path)
|
56
|
-
|
57
|
-
try:
|
58
|
-
# Route to appropriate conversion method
|
59
|
-
if source_format == 'json':
|
60
|
-
return _convert_from_json(filepath, target_format, output_path, options)
|
61
|
-
elif source_format == 'yaml':
|
62
|
-
return _convert_from_yaml(filepath, target_format, output_path, options)
|
63
|
-
elif source_format == 'xml':
|
64
|
-
return _convert_from_xml(filepath, target_format, output_path, options)
|
65
|
-
elif source_format == 'html':
|
66
|
-
return _convert_from_html(filepath, target_format, output_path, options)
|
67
|
-
elif source_format == 'md':
|
68
|
-
return _convert_from_md(filepath, target_format, output_path, options)
|
69
|
-
elif source_format == 'latex':
|
70
|
-
return _convert_from_latex(filepath, target_format, output_path, options)
|
71
|
-
else:
|
72
|
-
raise ValueError(f"Unsupported source format: {source_format}")
|
73
|
-
|
74
|
-
except Exception as e:
|
75
|
-
logger.error(f"Error converting {filepath} to {target_format}: {str(e)}")
|
76
|
-
raise RuntimeError(f"Failed to convert {filepath} to {target_format}: {str(e)}")
|
77
|
-
|
78
|
-
def _convert_from_json(
|
79
|
-
filepath: Path,
|
80
|
-
target_format: str,
|
81
|
-
output_path: Path,
|
82
|
-
options: Dict[str, Any]
|
83
|
-
) -> str:
|
84
|
-
"""Convert from JSON to other formats."""
|
85
|
-
import json
|
86
|
-
|
87
|
-
# Read the JSON file
|
88
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
89
|
-
data = json.load(f)
|
90
|
-
|
91
|
-
if target_format == 'xml':
|
92
|
-
try:
|
93
|
-
import dicttoxml
|
94
|
-
|
95
|
-
# Convert to XML
|
96
|
-
xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
|
97
|
-
|
98
|
-
# Write to file
|
99
|
-
with open(output_path, 'wb') as f:
|
100
|
-
f.write(xml)
|
101
|
-
|
102
|
-
except ImportError:
|
103
|
-
raise RuntimeError("dicttoxml library is required for JSON to XML conversion. Please install it.")
|
104
|
-
|
105
|
-
elif target_format == 'yaml':
|
106
|
-
try:
|
107
|
-
import yaml
|
108
|
-
|
109
|
-
# Convert to YAML
|
110
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
111
|
-
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
112
|
-
|
113
|
-
except ImportError:
|
114
|
-
raise RuntimeError("PyYAML library is required for JSON to YAML conversion. Please install it.")
|
115
|
-
|
116
|
-
elif target_format == 'csv':
|
117
|
-
try:
|
118
|
-
import csv
|
119
|
-
|
120
|
-
# Check if the JSON is a list of dictionaries (suitable for CSV)
|
121
|
-
if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
|
122
|
-
raise ValueError("JSON must be a list of dictionaries to convert to CSV")
|
123
|
-
|
124
|
-
# Get all unique keys as CSV headers
|
125
|
-
headers = set()
|
126
|
-
for item in data:
|
127
|
-
headers.update(item.keys())
|
128
|
-
headers = sorted(headers)
|
129
|
-
|
130
|
-
# Write to CSV
|
131
|
-
with open(output_path, 'w', encoding='utf-8', newline='') as f:
|
132
|
-
writer = csv.DictWriter(f, fieldnames=headers)
|
133
|
-
writer.writeheader()
|
134
|
-
writer.writerows(data)
|
135
|
-
|
136
|
-
except ValueError as e:
|
137
|
-
raise ValueError(f"Error converting JSON to CSV: {str(e)}")
|
138
|
-
|
139
|
-
elif target_format == 'txt':
|
140
|
-
# Simple pretty-printed JSON to text
|
141
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
142
|
-
json.dump(data, f, indent=4)
|
143
|
-
|
144
|
-
return str(output_path)
|
145
|
-
|
146
|
-
def _convert_from_yaml(
|
147
|
-
filepath: Path,
|
148
|
-
target_format: str,
|
149
|
-
output_path: Path,
|
150
|
-
options: Dict[str, Any]
|
151
|
-
) -> str:
|
152
|
-
"""Convert from YAML to other formats."""
|
153
|
-
try:
|
154
|
-
import yaml
|
155
|
-
|
156
|
-
# Read the YAML file
|
157
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
158
|
-
data = yaml.safe_load(f)
|
159
|
-
|
160
|
-
if target_format == 'json':
|
161
|
-
import json
|
162
|
-
|
163
|
-
# Convert to JSON
|
164
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
165
|
-
json.dump(data, f, indent=4)
|
166
|
-
|
167
|
-
elif target_format == 'xml':
|
168
|
-
import dicttoxml
|
169
|
-
|
170
|
-
# Convert to XML
|
171
|
-
xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
|
172
|
-
|
173
|
-
# Write to file
|
174
|
-
with open(output_path, 'wb') as f:
|
175
|
-
f.write(xml)
|
176
|
-
|
177
|
-
elif target_format == 'txt':
|
178
|
-
# Simple YAML to text (just copy the content)
|
179
|
-
with open(filepath, 'r', encoding='utf-8') as f_in:
|
180
|
-
with open(output_path, 'w', encoding='utf-8') as f_out:
|
181
|
-
f_out.write(f_in.read())
|
182
|
-
|
183
|
-
except ImportError:
|
184
|
-
raise RuntimeError("PyYAML library is required for YAML conversions. Please install it.")
|
185
|
-
|
186
|
-
return str(output_path)
|
187
|
-
|
188
|
-
def _convert_from_xml(
|
189
|
-
filepath: Path,
|
190
|
-
target_format: str,
|
191
|
-
output_path: Path,
|
192
|
-
options: Dict[str, Any]
|
193
|
-
) -> str:
|
194
|
-
"""Convert from XML to other formats."""
|
195
|
-
try:
|
196
|
-
import xmltodict
|
197
|
-
|
198
|
-
# Read the XML file
|
199
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
200
|
-
xml_content = f.read()
|
201
|
-
|
202
|
-
# Convert XML to dict
|
203
|
-
data = xmltodict.parse(xml_content)
|
204
|
-
|
205
|
-
if target_format == 'json':
|
206
|
-
import json
|
207
|
-
|
208
|
-
# Convert to JSON
|
209
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
210
|
-
json.dump(data, f, indent=4)
|
211
|
-
|
212
|
-
elif target_format == 'yaml':
|
213
|
-
import yaml
|
214
|
-
|
215
|
-
# Convert to YAML
|
216
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
217
|
-
yaml.dump(data, f, default_flow_style=False, sort_keys=False)
|
218
|
-
|
219
|
-
elif target_format == 'txt':
|
220
|
-
# Simple XML to text (just copy the content)
|
221
|
-
with open(filepath, 'r', encoding='utf-8') as f_in:
|
222
|
-
with open(output_path, 'w', encoding='utf-8') as f_out:
|
223
|
-
f_out.write(f_in.read())
|
224
|
-
|
225
|
-
except ImportError:
|
226
|
-
raise RuntimeError("xmltodict library is required for XML conversions. Please install it.")
|
227
|
-
|
228
|
-
return str(output_path)
|
229
|
-
|
230
|
-
def _convert_from_html(
|
231
|
-
filepath: Path,
|
232
|
-
target_format: str,
|
233
|
-
output_path: Path,
|
234
|
-
options: Dict[str, Any]
|
235
|
-
) -> str:
|
236
|
-
"""Convert from HTML to other formats."""
|
237
|
-
if target_format == 'md':
|
238
|
-
try:
|
239
|
-
import html2text
|
240
|
-
|
241
|
-
# Read the HTML file
|
242
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
243
|
-
html_content = f.read()
|
244
|
-
|
245
|
-
# Convert to Markdown
|
246
|
-
h = html2text.HTML2Text()
|
247
|
-
h.ignore_links = options.get('ignore_links', False)
|
248
|
-
h.ignore_images = options.get('ignore_images', False)
|
249
|
-
h.body_width = options.get('body_width', 0) # 0 means no wrapping
|
250
|
-
|
251
|
-
markdown = h.handle(html_content)
|
252
|
-
|
253
|
-
# Write to file
|
254
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
255
|
-
f.write(markdown)
|
256
|
-
|
257
|
-
except ImportError:
|
258
|
-
# Alternative method using pandoc
|
259
|
-
_convert_using_pandoc(filepath, target_format, output_path)
|
260
|
-
|
261
|
-
elif target_format == 'txt':
|
262
|
-
try:
|
263
|
-
from bs4 import BeautifulSoup
|
264
|
-
|
265
|
-
# Read the HTML file
|
266
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
267
|
-
html_content = f.read()
|
268
|
-
|
269
|
-
# Parse HTML and extract text
|
270
|
-
soup = BeautifulSoup(html_content, 'html.parser')
|
271
|
-
text = soup.get_text(separator='\n\n')
|
272
|
-
|
273
|
-
# Write to file
|
274
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
275
|
-
f.write(text)
|
276
|
-
|
277
|
-
except ImportError:
|
278
|
-
# Alternative method using pandoc
|
279
|
-
_convert_using_pandoc(filepath, target_format, output_path)
|
280
|
-
|
281
|
-
elif target_format == 'pdf':
|
282
|
-
# Use pandoc for HTML to PDF conversion
|
283
|
-
_convert_using_pandoc(filepath, target_format, output_path)
|
284
|
-
|
285
|
-
return str(output_path)
|
286
|
-
|
287
|
-
def _convert_from_md(
|
288
|
-
filepath: Path,
|
289
|
-
target_format: str,
|
290
|
-
output_path: Path,
|
291
|
-
options: Dict[str, Any]
|
292
|
-
) -> str:
|
293
|
-
"""Convert from Markdown to other formats."""
|
294
|
-
if target_format == 'html':
|
295
|
-
try:
|
296
|
-
import markdown
|
297
|
-
|
298
|
-
# Read the Markdown file
|
299
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
300
|
-
md_content = f.read()
|
301
|
-
|
302
|
-
# Convert to HTML
|
303
|
-
html = markdown.markdown(md_content, extensions=['tables', 'fenced_code'])
|
304
|
-
|
305
|
-
# Add HTML boilerplate
|
306
|
-
html = f"""<!DOCTYPE html>
|
307
|
-
<html>
|
308
|
-
<head>
|
309
|
-
<meta charset="utf-8">
|
310
|
-
<title>{filepath.stem}</title>
|
311
|
-
<style>
|
312
|
-
body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; max-width: 800px; margin: 0 auto; }}
|
313
|
-
pre {{ background-color: #f5f5f5; padding: 10px; border-radius: 5px; overflow-x: auto; }}
|
314
|
-
code {{ font-family: monospace; }}
|
315
|
-
img {{ max-width: 100%; }}
|
316
|
-
table {{ border-collapse: collapse; width: 100%; }}
|
317
|
-
th, td {{ border: 1px solid #ddd; padding: 8px; }}
|
318
|
-
th {{ background-color: #f2f2f2; }}
|
319
|
-
</style>
|
320
|
-
</head>
|
321
|
-
<body>
|
322
|
-
{html}
|
323
|
-
</body>
|
324
|
-
</html>"""
|
325
|
-
|
326
|
-
# Write to file
|
327
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
328
|
-
f.write(html)
|
329
|
-
|
330
|
-
except ImportError:
|
331
|
-
# Alternative method using pandoc
|
332
|
-
_convert_using_pandoc(filepath, target_format, output_path)
|
333
|
-
|
334
|
-
elif target_format == 'txt':
|
335
|
-
# Simple conversion - just strip markdown syntax
|
336
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
337
|
-
md_content = f.read()
|
338
|
-
|
339
|
-
# Very basic markdown stripping
|
340
|
-
import re
|
341
|
-
|
342
|
-
# Remove headers
|
343
|
-
text = re.sub(r'^#+\s+', '', md_content, flags=re.MULTILINE)
|
344
|
-
|
345
|
-
# Remove bold/italic
|
346
|
-
text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
|
347
|
-
text = re.sub(r'\*(.*?)\*', r'\1', text)
|
348
|
-
|
349
|
-
# Remove links
|
350
|
-
text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)
|
351
|
-
|
352
|
-
# Remove code blocks
|
353
|
-
text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
|
354
|
-
|
355
|
-
with open(output_path, 'w', encoding='utf-8') as f:
|
356
|
-
f.write(text)
|
357
|
-
|
358
|
-
elif target_format == 'pdf':
|
359
|
-
# Use pandoc for Markdown to PDF conversion
|
360
|
-
_convert_using_pandoc(filepath, target_format, output_path)
|
361
|
-
|
362
|
-
return str(output_path)
|
363
|
-
|
364
|
-
def _convert_from_latex(
|
365
|
-
filepath: Path,
|
366
|
-
target_format: str,
|
367
|
-
output_path: Path,
|
368
|
-
options: Dict[str, Any]
|
369
|
-
) -> str:
|
370
|
-
"""Convert from LaTeX to other formats."""
|
371
|
-
# Use pandoc for all LaTeX conversions
|
372
|
-
_convert_using_pandoc(filepath, target_format, output_path)
|
373
|
-
return str(output_path)
|
374
|
-
|
375
|
-
def _convert_using_pandoc(
|
376
|
-
filepath: Path,
|
377
|
-
target_format: str,
|
378
|
-
output_path: Path
|
379
|
-
) -> None:
|
380
|
-
"""Use pandoc for document conversion."""
|
381
|
-
try:
|
382
|
-
import subprocess
|
383
|
-
|
384
|
-
# Map our format names to pandoc format names
|
385
|
-
format_map = {
|
386
|
-
'md': 'markdown',
|
387
|
-
'html': 'html',
|
388
|
-
'pdf': 'pdf',
|
389
|
-
'docx': 'docx',
|
390
|
-
'latex': 'latex',
|
391
|
-
'txt': 'plain'
|
392
|
-
}
|
393
|
-
|
394
|
-
source_ext = filepath.suffix.lower().lstrip('.')
|
395
|
-
if source_ext == 'tex':
|
396
|
-
source_ext = 'latex'
|
397
|
-
|
398
|
-
pandoc_source = format_map.get(source_ext, source_ext)
|
399
|
-
pandoc_target = format_map.get(target_format, target_format)
|
400
|
-
|
401
|
-
cmd = [
|
402
|
-
'pandoc',
|
403
|
-
'-f', pandoc_source,
|
404
|
-
'-t', pandoc_target,
|
405
|
-
'-o', str(output_path),
|
406
|
-
str(filepath)
|
407
|
-
]
|
408
|
-
|
409
|
-
subprocess.run(cmd, check=True, capture_output=True)
|
410
|
-
|
411
|
-
except (ImportError, subprocess.SubprocessError) as e:
|
412
|
-
raise RuntimeError(f"Failed to convert using pandoc: {str(e)}. Please install pandoc.")
|