openconvert 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,412 +0,0 @@
1
- """
2
- Code and markup converter module for handling code and markup format conversions.
3
- """
4
-
5
- import logging
6
- import os
7
- import tempfile
8
- from pathlib import Path
9
- from typing import Union, Optional, Dict, Any
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
- # Define supported conversions
14
- SUPPORTED_CONVERSIONS = {
15
- 'json': ['xml', 'yaml', 'csv', 'txt'],
16
- 'yaml': ['json', 'xml', 'txt'],
17
- 'xml': ['json', 'yaml', 'txt'],
18
- 'html': ['md', 'txt', 'pdf'],
19
- 'md': ['html', 'txt', 'pdf'],
20
- 'latex': ['pdf', 'docx', 'html']
21
- }
22
-
23
- def convert(
24
- filepath: Union[str, Path],
25
- source_format: str,
26
- target_format: str,
27
- output_path: Union[str, Path],
28
- options: Optional[Dict[str, Any]] = None
29
- ) -> str:
30
- """
31
- Convert code or markup from one format to another.
32
-
33
- Args:
34
- filepath: Path to the source file
35
- source_format: Source format
36
- target_format: Target format
37
- output_path: Path to save the converted file
38
- options: Additional conversion options
39
-
40
- Returns:
41
- Path to the converted file
42
-
43
- Raises:
44
- ValueError: If the conversion is not supported
45
- RuntimeError: If the conversion fails
46
- """
47
- if options is None:
48
- options = {}
49
-
50
- # Check if conversion is supported
51
- if target_format not in SUPPORTED_CONVERSIONS.get(source_format, []):
52
- raise ValueError(f"Conversion from {source_format} to {target_format} is not supported")
53
-
54
- filepath = Path(filepath)
55
- output_path = Path(output_path)
56
-
57
- try:
58
- # Route to appropriate conversion method
59
- if source_format == 'json':
60
- return _convert_from_json(filepath, target_format, output_path, options)
61
- elif source_format == 'yaml':
62
- return _convert_from_yaml(filepath, target_format, output_path, options)
63
- elif source_format == 'xml':
64
- return _convert_from_xml(filepath, target_format, output_path, options)
65
- elif source_format == 'html':
66
- return _convert_from_html(filepath, target_format, output_path, options)
67
- elif source_format == 'md':
68
- return _convert_from_md(filepath, target_format, output_path, options)
69
- elif source_format == 'latex':
70
- return _convert_from_latex(filepath, target_format, output_path, options)
71
- else:
72
- raise ValueError(f"Unsupported source format: {source_format}")
73
-
74
- except Exception as e:
75
- logger.error(f"Error converting {filepath} to {target_format}: {str(e)}")
76
- raise RuntimeError(f"Failed to convert {filepath} to {target_format}: {str(e)}")
77
-
78
- def _convert_from_json(
79
- filepath: Path,
80
- target_format: str,
81
- output_path: Path,
82
- options: Dict[str, Any]
83
- ) -> str:
84
- """Convert from JSON to other formats."""
85
- import json
86
-
87
- # Read the JSON file
88
- with open(filepath, 'r', encoding='utf-8') as f:
89
- data = json.load(f)
90
-
91
- if target_format == 'xml':
92
- try:
93
- import dicttoxml
94
-
95
- # Convert to XML
96
- xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
97
-
98
- # Write to file
99
- with open(output_path, 'wb') as f:
100
- f.write(xml)
101
-
102
- except ImportError:
103
- raise RuntimeError("dicttoxml library is required for JSON to XML conversion. Please install it.")
104
-
105
- elif target_format == 'yaml':
106
- try:
107
- import yaml
108
-
109
- # Convert to YAML
110
- with open(output_path, 'w', encoding='utf-8') as f:
111
- yaml.dump(data, f, default_flow_style=False, sort_keys=False)
112
-
113
- except ImportError:
114
- raise RuntimeError("PyYAML library is required for JSON to YAML conversion. Please install it.")
115
-
116
- elif target_format == 'csv':
117
- try:
118
- import csv
119
-
120
- # Check if the JSON is a list of dictionaries (suitable for CSV)
121
- if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
122
- raise ValueError("JSON must be a list of dictionaries to convert to CSV")
123
-
124
- # Get all unique keys as CSV headers
125
- headers = set()
126
- for item in data:
127
- headers.update(item.keys())
128
- headers = sorted(headers)
129
-
130
- # Write to CSV
131
- with open(output_path, 'w', encoding='utf-8', newline='') as f:
132
- writer = csv.DictWriter(f, fieldnames=headers)
133
- writer.writeheader()
134
- writer.writerows(data)
135
-
136
- except ValueError as e:
137
- raise ValueError(f"Error converting JSON to CSV: {str(e)}")
138
-
139
- elif target_format == 'txt':
140
- # Simple pretty-printed JSON to text
141
- with open(output_path, 'w', encoding='utf-8') as f:
142
- json.dump(data, f, indent=4)
143
-
144
- return str(output_path)
145
-
146
- def _convert_from_yaml(
147
- filepath: Path,
148
- target_format: str,
149
- output_path: Path,
150
- options: Dict[str, Any]
151
- ) -> str:
152
- """Convert from YAML to other formats."""
153
- try:
154
- import yaml
155
-
156
- # Read the YAML file
157
- with open(filepath, 'r', encoding='utf-8') as f:
158
- data = yaml.safe_load(f)
159
-
160
- if target_format == 'json':
161
- import json
162
-
163
- # Convert to JSON
164
- with open(output_path, 'w', encoding='utf-8') as f:
165
- json.dump(data, f, indent=4)
166
-
167
- elif target_format == 'xml':
168
- import dicttoxml
169
-
170
- # Convert to XML
171
- xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
172
-
173
- # Write to file
174
- with open(output_path, 'wb') as f:
175
- f.write(xml)
176
-
177
- elif target_format == 'txt':
178
- # Simple YAML to text (just copy the content)
179
- with open(filepath, 'r', encoding='utf-8') as f_in:
180
- with open(output_path, 'w', encoding='utf-8') as f_out:
181
- f_out.write(f_in.read())
182
-
183
- except ImportError:
184
- raise RuntimeError("PyYAML library is required for YAML conversions. Please install it.")
185
-
186
- return str(output_path)
187
-
188
- def _convert_from_xml(
189
- filepath: Path,
190
- target_format: str,
191
- output_path: Path,
192
- options: Dict[str, Any]
193
- ) -> str:
194
- """Convert from XML to other formats."""
195
- try:
196
- import xmltodict
197
-
198
- # Read the XML file
199
- with open(filepath, 'r', encoding='utf-8') as f:
200
- xml_content = f.read()
201
-
202
- # Convert XML to dict
203
- data = xmltodict.parse(xml_content)
204
-
205
- if target_format == 'json':
206
- import json
207
-
208
- # Convert to JSON
209
- with open(output_path, 'w', encoding='utf-8') as f:
210
- json.dump(data, f, indent=4)
211
-
212
- elif target_format == 'yaml':
213
- import yaml
214
-
215
- # Convert to YAML
216
- with open(output_path, 'w', encoding='utf-8') as f:
217
- yaml.dump(data, f, default_flow_style=False, sort_keys=False)
218
-
219
- elif target_format == 'txt':
220
- # Simple XML to text (just copy the content)
221
- with open(filepath, 'r', encoding='utf-8') as f_in:
222
- with open(output_path, 'w', encoding='utf-8') as f_out:
223
- f_out.write(f_in.read())
224
-
225
- except ImportError:
226
- raise RuntimeError("xmltodict library is required for XML conversions. Please install it.")
227
-
228
- return str(output_path)
229
-
230
- def _convert_from_html(
231
- filepath: Path,
232
- target_format: str,
233
- output_path: Path,
234
- options: Dict[str, Any]
235
- ) -> str:
236
- """Convert from HTML to other formats."""
237
- if target_format == 'md':
238
- try:
239
- import html2text
240
-
241
- # Read the HTML file
242
- with open(filepath, 'r', encoding='utf-8') as f:
243
- html_content = f.read()
244
-
245
- # Convert to Markdown
246
- h = html2text.HTML2Text()
247
- h.ignore_links = options.get('ignore_links', False)
248
- h.ignore_images = options.get('ignore_images', False)
249
- h.body_width = options.get('body_width', 0) # 0 means no wrapping
250
-
251
- markdown = h.handle(html_content)
252
-
253
- # Write to file
254
- with open(output_path, 'w', encoding='utf-8') as f:
255
- f.write(markdown)
256
-
257
- except ImportError:
258
- # Alternative method using pandoc
259
- _convert_using_pandoc(filepath, target_format, output_path)
260
-
261
- elif target_format == 'txt':
262
- try:
263
- from bs4 import BeautifulSoup
264
-
265
- # Read the HTML file
266
- with open(filepath, 'r', encoding='utf-8') as f:
267
- html_content = f.read()
268
-
269
- # Parse HTML and extract text
270
- soup = BeautifulSoup(html_content, 'html.parser')
271
- text = soup.get_text(separator='\n\n')
272
-
273
- # Write to file
274
- with open(output_path, 'w', encoding='utf-8') as f:
275
- f.write(text)
276
-
277
- except ImportError:
278
- # Alternative method using pandoc
279
- _convert_using_pandoc(filepath, target_format, output_path)
280
-
281
- elif target_format == 'pdf':
282
- # Use pandoc for HTML to PDF conversion
283
- _convert_using_pandoc(filepath, target_format, output_path)
284
-
285
- return str(output_path)
286
-
287
- def _convert_from_md(
288
- filepath: Path,
289
- target_format: str,
290
- output_path: Path,
291
- options: Dict[str, Any]
292
- ) -> str:
293
- """Convert from Markdown to other formats."""
294
- if target_format == 'html':
295
- try:
296
- import markdown
297
-
298
- # Read the Markdown file
299
- with open(filepath, 'r', encoding='utf-8') as f:
300
- md_content = f.read()
301
-
302
- # Convert to HTML
303
- html = markdown.markdown(md_content, extensions=['tables', 'fenced_code'])
304
-
305
- # Add HTML boilerplate
306
- html = f"""<!DOCTYPE html>
307
- <html>
308
- <head>
309
- <meta charset="utf-8">
310
- <title>{filepath.stem}</title>
311
- <style>
312
- body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; max-width: 800px; margin: 0 auto; }}
313
- pre {{ background-color: #f5f5f5; padding: 10px; border-radius: 5px; overflow-x: auto; }}
314
- code {{ font-family: monospace; }}
315
- img {{ max-width: 100%; }}
316
- table {{ border-collapse: collapse; width: 100%; }}
317
- th, td {{ border: 1px solid #ddd; padding: 8px; }}
318
- th {{ background-color: #f2f2f2; }}
319
- </style>
320
- </head>
321
- <body>
322
- {html}
323
- </body>
324
- </html>"""
325
-
326
- # Write to file
327
- with open(output_path, 'w', encoding='utf-8') as f:
328
- f.write(html)
329
-
330
- except ImportError:
331
- # Alternative method using pandoc
332
- _convert_using_pandoc(filepath, target_format, output_path)
333
-
334
- elif target_format == 'txt':
335
- # Simple conversion - just strip markdown syntax
336
- with open(filepath, 'r', encoding='utf-8') as f:
337
- md_content = f.read()
338
-
339
- # Very basic markdown stripping
340
- import re
341
-
342
- # Remove headers
343
- text = re.sub(r'^#+\s+', '', md_content, flags=re.MULTILINE)
344
-
345
- # Remove bold/italic
346
- text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
347
- text = re.sub(r'\*(.*?)\*', r'\1', text)
348
-
349
- # Remove links
350
- text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)
351
-
352
- # Remove code blocks
353
- text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
354
-
355
- with open(output_path, 'w', encoding='utf-8') as f:
356
- f.write(text)
357
-
358
- elif target_format == 'pdf':
359
- # Use pandoc for Markdown to PDF conversion
360
- _convert_using_pandoc(filepath, target_format, output_path)
361
-
362
- return str(output_path)
363
-
364
- def _convert_from_latex(
365
- filepath: Path,
366
- target_format: str,
367
- output_path: Path,
368
- options: Dict[str, Any]
369
- ) -> str:
370
- """Convert from LaTeX to other formats."""
371
- # Use pandoc for all LaTeX conversions
372
- _convert_using_pandoc(filepath, target_format, output_path)
373
- return str(output_path)
374
-
375
- def _convert_using_pandoc(
376
- filepath: Path,
377
- target_format: str,
378
- output_path: Path
379
- ) -> None:
380
- """Use pandoc for document conversion."""
381
- try:
382
- import subprocess
383
-
384
- # Map our format names to pandoc format names
385
- format_map = {
386
- 'md': 'markdown',
387
- 'html': 'html',
388
- 'pdf': 'pdf',
389
- 'docx': 'docx',
390
- 'latex': 'latex',
391
- 'txt': 'plain'
392
- }
393
-
394
- source_ext = filepath.suffix.lower().lstrip('.')
395
- if source_ext == 'tex':
396
- source_ext = 'latex'
397
-
398
- pandoc_source = format_map.get(source_ext, source_ext)
399
- pandoc_target = format_map.get(target_format, target_format)
400
-
401
- cmd = [
402
- 'pandoc',
403
- '-f', pandoc_source,
404
- '-t', pandoc_target,
405
- '-o', str(output_path),
406
- str(filepath)
407
- ]
408
-
409
- subprocess.run(cmd, check=True, capture_output=True)
410
-
411
- except (ImportError, subprocess.SubprocessError) as e:
412
- raise RuntimeError(f"Failed to convert using pandoc: {str(e)}. Please install pandoc.")