openconvert 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,223 @@
1
+ """
2
+ Audio converter module for handling audio format conversions.
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Union, Optional, Dict, Any
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Define supported conversions
14
+ SUPPORTED_CONVERSIONS = {
15
+ 'mp3': ['wav', 'ogg', 'flac', 'aac'],
16
+ 'wav': ['mp3', 'ogg', 'flac', 'aac'],
17
+ 'ogg': ['mp3', 'wav', 'flac', 'aac'],
18
+ 'flac': ['mp3', 'wav', 'ogg', 'aac'],
19
+ 'aac': ['mp3', 'wav', 'ogg', 'flac']
20
+ }
21
+
22
+ def convert(
23
+ filepath: Union[str, Path],
24
+ source_format: str,
25
+ target_format: str,
26
+ output_path: Union[str, Path],
27
+ options: Optional[Dict[str, Any]] = None
28
+ ) -> str:
29
+ """
30
+ Convert an audio file from one format to another.
31
+
32
+ Args:
33
+ filepath: Path to the source audio file
34
+ source_format: Source audio format
35
+ target_format: Target audio format
36
+ output_path: Path to save the converted audio
37
+ options: Additional conversion options
38
+
39
+ Returns:
40
+ Path to the converted audio
41
+
42
+ Raises:
43
+ ValueError: If the conversion is not supported
44
+ RuntimeError: If the conversion fails
45
+ """
46
+ if options is None:
47
+ options = {}
48
+
49
+ # Check if conversion is supported
50
+ if target_format not in SUPPORTED_CONVERSIONS.get(source_format, []):
51
+ raise ValueError(f"Conversion from {source_format} to {target_format} is not supported")
52
+
53
+ filepath = Path(filepath)
54
+ output_path = Path(output_path)
55
+
56
+ try:
57
+ # Try using pydub for audio conversion
58
+ return _convert_with_pydub(filepath, source_format, target_format, output_path, options)
59
+ except ImportError:
60
+ logger.warning("pydub not installed. Trying alternative method...")
61
+
62
+ # Try using ffmpeg directly
63
+ return _convert_with_ffmpeg(filepath, source_format, target_format, output_path, options)
64
+
65
+ def speech_to_text(
66
+ filepath: Union[str, Path],
67
+ output_path: Union[str, Path],
68
+ options: Optional[Dict[str, Any]] = None
69
+ ) -> str:
70
+ """
71
+ Convert speech audio to text.
72
+
73
+ Args:
74
+ filepath: Path to the source audio file
75
+ output_path: Path to save the text file
76
+ options: Additional conversion options
77
+
78
+ Returns:
79
+ Path to the text file
80
+
81
+ Raises:
82
+ RuntimeError: If the conversion fails
83
+ """
84
+ if options is None:
85
+ options = {}
86
+
87
+ filepath = Path(filepath)
88
+ output_path = Path(output_path)
89
+
90
+ try:
91
+ # Try using SpeechRecognition library
92
+ import speech_recognition as sr
93
+
94
+ # Initialize recognizer
95
+ recognizer = sr.Recognizer()
96
+
97
+ # Load the audio file
98
+ with sr.AudioFile(str(filepath)) as source:
99
+ # Adjust for ambient noise
100
+ recognizer.adjust_for_ambient_noise(source)
101
+
102
+ # Record the audio
103
+ audio_data = recognizer.record(source)
104
+
105
+ # Recognize speech using Google Speech Recognition
106
+ language = options.get('language', 'en-US')
107
+ text = recognizer.recognize_google(audio_data, language=language)
108
+
109
+ # Write the text to the output file
110
+ with open(output_path, 'w', encoding='utf-8') as f:
111
+ f.write(text)
112
+
113
+ return str(output_path)
114
+
115
+ except ImportError:
116
+ logger.error("SpeechRecognition library is required for speech-to-text conversion")
117
+ raise RuntimeError("SpeechRecognition library is required for speech-to-text conversion. Please install it.")
118
+
119
+ except Exception as e:
120
+ logger.error(f"Error in speech-to-text conversion: {str(e)}")
121
+ raise RuntimeError(f"Failed to convert speech to text: {str(e)}")
122
+
123
+ def _convert_with_pydub(
124
+ filepath: Path,
125
+ source_format: str,
126
+ target_format: str,
127
+ output_path: Path,
128
+ options: Dict[str, Any]
129
+ ) -> str:
130
+ """Convert audio using pydub library."""
131
+ from pydub import AudioSegment
132
+
133
+ # Load the audio file
134
+ audio = AudioSegment.from_file(str(filepath), format=source_format)
135
+
136
+ # Apply audio processing options
137
+ if 'volume' in options:
138
+ # Adjust volume (in dB)
139
+ audio = audio + options['volume']
140
+
141
+ if 'speed' in options:
142
+ # Change speed (requires ffmpeg with rubberband)
143
+ speed = options['speed']
144
+ audio = audio._spawn(audio.raw_data, overrides={
145
+ "frame_rate": int(audio.frame_rate * speed)
146
+ })
147
+
148
+ if 'sample_rate' in options:
149
+ # Change sample rate
150
+ audio = audio.set_frame_rate(options['sample_rate'])
151
+
152
+ if 'channels' in options:
153
+ # Change number of channels
154
+ if options['channels'] == 1:
155
+ audio = audio.set_channels(1)
156
+ elif options['channels'] == 2:
157
+ audio = audio.set_channels(2)
158
+
159
+ if 'trim' in options:
160
+ # Trim audio (start_ms, end_ms)
161
+ start_ms, end_ms = options['trim']
162
+ audio = audio[start_ms:end_ms]
163
+
164
+ # Set export parameters
165
+ export_params = {}
166
+
167
+ if 'bitrate' in options:
168
+ export_params['bitrate'] = options['bitrate']
169
+
170
+ # Export to target format
171
+ audio.export(str(output_path), format=target_format, **export_params)
172
+
173
+ return str(output_path)
174
+
175
+ def _convert_with_ffmpeg(
176
+ filepath: Path,
177
+ source_format: str,
178
+ target_format: str,
179
+ output_path: Path,
180
+ options: Dict[str, Any]
181
+ ) -> str:
182
+ """Convert audio using ffmpeg directly."""
183
+ import subprocess
184
+
185
+ # Basic ffmpeg command
186
+ cmd = ['ffmpeg', '-i', str(filepath)]
187
+
188
+ # Add options
189
+ if 'volume' in options:
190
+ # Volume adjustment in dB
191
+ cmd.extend(['-filter:a', f'volume={options["volume"]}dB'])
192
+
193
+ if 'speed' in options:
194
+ # Speed adjustment
195
+ cmd.extend(['-filter:a', f'atempo={options["speed"]}'])
196
+
197
+ if 'sample_rate' in options:
198
+ # Sample rate
199
+ cmd.extend(['-ar', str(options['sample_rate'])])
200
+
201
+ if 'channels' in options:
202
+ # Number of channels
203
+ cmd.extend(['-ac', str(options['channels'])])
204
+
205
+ if 'bitrate' in options:
206
+ # Bitrate
207
+ cmd.extend(['-b:a', options['bitrate']])
208
+
209
+ if 'trim' in options:
210
+ # Trim (start_seconds, duration_seconds)
211
+ start_sec, duration_sec = options['trim']
212
+ cmd.extend(['-ss', str(start_sec), '-t', str(duration_sec)])
213
+
214
+ # Add output file
215
+ cmd.append(str(output_path))
216
+
217
+ # Run ffmpeg
218
+ try:
219
+ subprocess.run(cmd, check=True, capture_output=True)
220
+ return str(output_path)
221
+ except subprocess.SubprocessError as e:
222
+ logger.error(f"Error running ffmpeg: {str(e)}")
223
+ raise RuntimeError(f"Failed to convert audio with ffmpeg: {str(e)}")
@@ -0,0 +1,412 @@
1
+ """
2
+ Code and markup converter module for handling code and markup format conversions.
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Union, Optional, Dict, Any
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Define supported conversions
14
+ SUPPORTED_CONVERSIONS = {
15
+ 'json': ['xml', 'yaml', 'csv', 'txt'],
16
+ 'yaml': ['json', 'xml', 'txt'],
17
+ 'xml': ['json', 'yaml', 'txt'],
18
+ 'html': ['md', 'txt', 'pdf'],
19
+ 'md': ['html', 'txt', 'pdf'],
20
+ 'latex': ['pdf', 'docx', 'html']
21
+ }
22
+
23
+ def convert(
24
+ filepath: Union[str, Path],
25
+ source_format: str,
26
+ target_format: str,
27
+ output_path: Union[str, Path],
28
+ options: Optional[Dict[str, Any]] = None
29
+ ) -> str:
30
+ """
31
+ Convert code or markup from one format to another.
32
+
33
+ Args:
34
+ filepath: Path to the source file
35
+ source_format: Source format
36
+ target_format: Target format
37
+ output_path: Path to save the converted file
38
+ options: Additional conversion options
39
+
40
+ Returns:
41
+ Path to the converted file
42
+
43
+ Raises:
44
+ ValueError: If the conversion is not supported
45
+ RuntimeError: If the conversion fails
46
+ """
47
+ if options is None:
48
+ options = {}
49
+
50
+ # Check if conversion is supported
51
+ if target_format not in SUPPORTED_CONVERSIONS.get(source_format, []):
52
+ raise ValueError(f"Conversion from {source_format} to {target_format} is not supported")
53
+
54
+ filepath = Path(filepath)
55
+ output_path = Path(output_path)
56
+
57
+ try:
58
+ # Route to appropriate conversion method
59
+ if source_format == 'json':
60
+ return _convert_from_json(filepath, target_format, output_path, options)
61
+ elif source_format == 'yaml':
62
+ return _convert_from_yaml(filepath, target_format, output_path, options)
63
+ elif source_format == 'xml':
64
+ return _convert_from_xml(filepath, target_format, output_path, options)
65
+ elif source_format == 'html':
66
+ return _convert_from_html(filepath, target_format, output_path, options)
67
+ elif source_format == 'md':
68
+ return _convert_from_md(filepath, target_format, output_path, options)
69
+ elif source_format == 'latex':
70
+ return _convert_from_latex(filepath, target_format, output_path, options)
71
+ else:
72
+ raise ValueError(f"Unsupported source format: {source_format}")
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error converting {filepath} to {target_format}: {str(e)}")
76
+ raise RuntimeError(f"Failed to convert {filepath} to {target_format}: {str(e)}")
77
+
78
+ def _convert_from_json(
79
+ filepath: Path,
80
+ target_format: str,
81
+ output_path: Path,
82
+ options: Dict[str, Any]
83
+ ) -> str:
84
+ """Convert from JSON to other formats."""
85
+ import json
86
+
87
+ # Read the JSON file
88
+ with open(filepath, 'r', encoding='utf-8') as f:
89
+ data = json.load(f)
90
+
91
+ if target_format == 'xml':
92
+ try:
93
+ import dicttoxml
94
+
95
+ # Convert to XML
96
+ xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
97
+
98
+ # Write to file
99
+ with open(output_path, 'wb') as f:
100
+ f.write(xml)
101
+
102
+ except ImportError:
103
+ raise RuntimeError("dicttoxml library is required for JSON to XML conversion. Please install it.")
104
+
105
+ elif target_format == 'yaml':
106
+ try:
107
+ import yaml
108
+
109
+ # Convert to YAML
110
+ with open(output_path, 'w', encoding='utf-8') as f:
111
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False)
112
+
113
+ except ImportError:
114
+ raise RuntimeError("PyYAML library is required for JSON to YAML conversion. Please install it.")
115
+
116
+ elif target_format == 'csv':
117
+ try:
118
+ import csv
119
+
120
+ # Check if the JSON is a list of dictionaries (suitable for CSV)
121
+ if not isinstance(data, list) or not all(isinstance(item, dict) for item in data):
122
+ raise ValueError("JSON must be a list of dictionaries to convert to CSV")
123
+
124
+ # Get all unique keys as CSV headers
125
+ headers = set()
126
+ for item in data:
127
+ headers.update(item.keys())
128
+ headers = sorted(headers)
129
+
130
+ # Write to CSV
131
+ with open(output_path, 'w', encoding='utf-8', newline='') as f:
132
+ writer = csv.DictWriter(f, fieldnames=headers)
133
+ writer.writeheader()
134
+ writer.writerows(data)
135
+
136
+ except ValueError as e:
137
+ raise ValueError(f"Error converting JSON to CSV: {str(e)}")
138
+
139
+ elif target_format == 'txt':
140
+ # Simple pretty-printed JSON to text
141
+ with open(output_path, 'w', encoding='utf-8') as f:
142
+ json.dump(data, f, indent=4)
143
+
144
+ return str(output_path)
145
+
146
+ def _convert_from_yaml(
147
+ filepath: Path,
148
+ target_format: str,
149
+ output_path: Path,
150
+ options: Dict[str, Any]
151
+ ) -> str:
152
+ """Convert from YAML to other formats."""
153
+ try:
154
+ import yaml
155
+
156
+ # Read the YAML file
157
+ with open(filepath, 'r', encoding='utf-8') as f:
158
+ data = yaml.safe_load(f)
159
+
160
+ if target_format == 'json':
161
+ import json
162
+
163
+ # Convert to JSON
164
+ with open(output_path, 'w', encoding='utf-8') as f:
165
+ json.dump(data, f, indent=4)
166
+
167
+ elif target_format == 'xml':
168
+ import dicttoxml
169
+
170
+ # Convert to XML
171
+ xml = dicttoxml.dicttoxml(data, custom_root=options.get('root_name', 'root'), attr_type=False)
172
+
173
+ # Write to file
174
+ with open(output_path, 'wb') as f:
175
+ f.write(xml)
176
+
177
+ elif target_format == 'txt':
178
+ # Simple YAML to text (just copy the content)
179
+ with open(filepath, 'r', encoding='utf-8') as f_in:
180
+ with open(output_path, 'w', encoding='utf-8') as f_out:
181
+ f_out.write(f_in.read())
182
+
183
+ except ImportError:
184
+ raise RuntimeError("PyYAML library is required for YAML conversions. Please install it.")
185
+
186
+ return str(output_path)
187
+
188
+ def _convert_from_xml(
189
+ filepath: Path,
190
+ target_format: str,
191
+ output_path: Path,
192
+ options: Dict[str, Any]
193
+ ) -> str:
194
+ """Convert from XML to other formats."""
195
+ try:
196
+ import xmltodict
197
+
198
+ # Read the XML file
199
+ with open(filepath, 'r', encoding='utf-8') as f:
200
+ xml_content = f.read()
201
+
202
+ # Convert XML to dict
203
+ data = xmltodict.parse(xml_content)
204
+
205
+ if target_format == 'json':
206
+ import json
207
+
208
+ # Convert to JSON
209
+ with open(output_path, 'w', encoding='utf-8') as f:
210
+ json.dump(data, f, indent=4)
211
+
212
+ elif target_format == 'yaml':
213
+ import yaml
214
+
215
+ # Convert to YAML
216
+ with open(output_path, 'w', encoding='utf-8') as f:
217
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False)
218
+
219
+ elif target_format == 'txt':
220
+ # Simple XML to text (just copy the content)
221
+ with open(filepath, 'r', encoding='utf-8') as f_in:
222
+ with open(output_path, 'w', encoding='utf-8') as f_out:
223
+ f_out.write(f_in.read())
224
+
225
+ except ImportError:
226
+ raise RuntimeError("xmltodict library is required for XML conversions. Please install it.")
227
+
228
+ return str(output_path)
229
+
230
+ def _convert_from_html(
231
+ filepath: Path,
232
+ target_format: str,
233
+ output_path: Path,
234
+ options: Dict[str, Any]
235
+ ) -> str:
236
+ """Convert from HTML to other formats."""
237
+ if target_format == 'md':
238
+ try:
239
+ import html2text
240
+
241
+ # Read the HTML file
242
+ with open(filepath, 'r', encoding='utf-8') as f:
243
+ html_content = f.read()
244
+
245
+ # Convert to Markdown
246
+ h = html2text.HTML2Text()
247
+ h.ignore_links = options.get('ignore_links', False)
248
+ h.ignore_images = options.get('ignore_images', False)
249
+ h.body_width = options.get('body_width', 0) # 0 means no wrapping
250
+
251
+ markdown = h.handle(html_content)
252
+
253
+ # Write to file
254
+ with open(output_path, 'w', encoding='utf-8') as f:
255
+ f.write(markdown)
256
+
257
+ except ImportError:
258
+ # Alternative method using pandoc
259
+ _convert_using_pandoc(filepath, target_format, output_path)
260
+
261
+ elif target_format == 'txt':
262
+ try:
263
+ from bs4 import BeautifulSoup
264
+
265
+ # Read the HTML file
266
+ with open(filepath, 'r', encoding='utf-8') as f:
267
+ html_content = f.read()
268
+
269
+ # Parse HTML and extract text
270
+ soup = BeautifulSoup(html_content, 'html.parser')
271
+ text = soup.get_text(separator='\n\n')
272
+
273
+ # Write to file
274
+ with open(output_path, 'w', encoding='utf-8') as f:
275
+ f.write(text)
276
+
277
+ except ImportError:
278
+ # Alternative method using pandoc
279
+ _convert_using_pandoc(filepath, target_format, output_path)
280
+
281
+ elif target_format == 'pdf':
282
+ # Use pandoc for HTML to PDF conversion
283
+ _convert_using_pandoc(filepath, target_format, output_path)
284
+
285
+ return str(output_path)
286
+
287
+ def _convert_from_md(
288
+ filepath: Path,
289
+ target_format: str,
290
+ output_path: Path,
291
+ options: Dict[str, Any]
292
+ ) -> str:
293
+ """Convert from Markdown to other formats."""
294
+ if target_format == 'html':
295
+ try:
296
+ import markdown
297
+
298
+ # Read the Markdown file
299
+ with open(filepath, 'r', encoding='utf-8') as f:
300
+ md_content = f.read()
301
+
302
+ # Convert to HTML
303
+ html = markdown.markdown(md_content, extensions=['tables', 'fenced_code'])
304
+
305
+ # Add HTML boilerplate
306
+ html = f"""<!DOCTYPE html>
307
+ <html>
308
+ <head>
309
+ <meta charset="utf-8">
310
+ <title>{filepath.stem}</title>
311
+ <style>
312
+ body {{ font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; max-width: 800px; margin: 0 auto; }}
313
+ pre {{ background-color: #f5f5f5; padding: 10px; border-radius: 5px; overflow-x: auto; }}
314
+ code {{ font-family: monospace; }}
315
+ img {{ max-width: 100%; }}
316
+ table {{ border-collapse: collapse; width: 100%; }}
317
+ th, td {{ border: 1px solid #ddd; padding: 8px; }}
318
+ th {{ background-color: #f2f2f2; }}
319
+ </style>
320
+ </head>
321
+ <body>
322
+ {html}
323
+ </body>
324
+ </html>"""
325
+
326
+ # Write to file
327
+ with open(output_path, 'w', encoding='utf-8') as f:
328
+ f.write(html)
329
+
330
+ except ImportError:
331
+ # Alternative method using pandoc
332
+ _convert_using_pandoc(filepath, target_format, output_path)
333
+
334
+ elif target_format == 'txt':
335
+ # Simple conversion - just strip markdown syntax
336
+ with open(filepath, 'r', encoding='utf-8') as f:
337
+ md_content = f.read()
338
+
339
+ # Very basic markdown stripping
340
+ import re
341
+
342
+ # Remove headers
343
+ text = re.sub(r'^#+\s+', '', md_content, flags=re.MULTILINE)
344
+
345
+ # Remove bold/italic
346
+ text = re.sub(r'\*\*(.*?)\*\*', r'\1', text)
347
+ text = re.sub(r'\*(.*?)\*', r'\1', text)
348
+
349
+ # Remove links
350
+ text = re.sub(r'\[(.*?)\]\(.*?\)', r'\1', text)
351
+
352
+ # Remove code blocks
353
+ text = re.sub(r'```.*?```', '', text, flags=re.DOTALL)
354
+
355
+ with open(output_path, 'w', encoding='utf-8') as f:
356
+ f.write(text)
357
+
358
+ elif target_format == 'pdf':
359
+ # Use pandoc for Markdown to PDF conversion
360
+ _convert_using_pandoc(filepath, target_format, output_path)
361
+
362
+ return str(output_path)
363
+
364
+ def _convert_from_latex(
365
+ filepath: Path,
366
+ target_format: str,
367
+ output_path: Path,
368
+ options: Dict[str, Any]
369
+ ) -> str:
370
+ """Convert from LaTeX to other formats."""
371
+ # Use pandoc for all LaTeX conversions
372
+ _convert_using_pandoc(filepath, target_format, output_path)
373
+ return str(output_path)
374
+
375
+ def _convert_using_pandoc(
376
+ filepath: Path,
377
+ target_format: str,
378
+ output_path: Path
379
+ ) -> None:
380
+ """Use pandoc for document conversion."""
381
+ try:
382
+ import subprocess
383
+
384
+ # Map our format names to pandoc format names
385
+ format_map = {
386
+ 'md': 'markdown',
387
+ 'html': 'html',
388
+ 'pdf': 'pdf',
389
+ 'docx': 'docx',
390
+ 'latex': 'latex',
391
+ 'txt': 'plain'
392
+ }
393
+
394
+ source_ext = filepath.suffix.lower().lstrip('.')
395
+ if source_ext == 'tex':
396
+ source_ext = 'latex'
397
+
398
+ pandoc_source = format_map.get(source_ext, source_ext)
399
+ pandoc_target = format_map.get(target_format, target_format)
400
+
401
+ cmd = [
402
+ 'pandoc',
403
+ '-f', pandoc_source,
404
+ '-t', pandoc_target,
405
+ '-o', str(output_path),
406
+ str(filepath)
407
+ ]
408
+
409
+ subprocess.run(cmd, check=True, capture_output=True)
410
+
411
+ except (ImportError, subprocess.SubprocessError) as e:
412
+ raise RuntimeError(f"Failed to convert using pandoc: {str(e)}. Please install pandoc.")