modusa 0.4.20__py3-none-any.whl → 0.4.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modusa/.DS_Store ADDED
Binary file
modusa/__init__.py CHANGED
@@ -8,4 +8,4 @@ from modusa.tools import play, convert, record, save
8
8
  from modusa.tools import download
9
9
  from modusa.tools import load, load_ann
10
10
 
11
- __version__ = "0.4.20"
11
+ __version__ = "0.4.23"
@@ -6,7 +6,7 @@
6
6
  # Email: ankit0.anand0@gmail.com
7
7
  #---------------------------------
8
8
 
9
- def load_ann(path, clip=None):
9
+ def load_ann(path, trim=None):
10
10
  """
11
11
  Load annotation from audatity label
12
12
  text file and also ctm file.
@@ -15,9 +15,9 @@ def load_ann(path, clip=None):
15
15
  ----------
16
16
  path: str
17
17
  - label text/ctm file path.
18
- clip: tuple[number, number] | number | None
19
- - Incase you clipped the audio signal, this parameter will help clip the annotation.
20
- - If you clip the audio, say from (10, 20), set the clip to (10, 20).
18
+ trim: tuple[number, number] | number | None
19
+ - Incase you trimmed the audio signal, this parameter will help clip the annotation making sure that the timings are aligned to the trimmed audio.
20
+ - If you trimmed the audio, say from (10, 20), set the trim to (10, 20).
21
21
  - Default: None
22
22
 
23
23
  Returns
@@ -42,14 +42,14 @@ def load_ann(path, clip=None):
42
42
  ann = [] # This will store the annotation
43
43
 
44
44
  # Clipping the annotation to match with the clipped audio
45
- if clip is not None:
45
+ if trim is not None:
46
46
  # Map clip input to the right format
47
- if isinstance(clip, int or float):
48
- clip = (0, clip)
49
- elif isinstance(clip, tuple) and len(clip) > 1:
50
- clip = (clip[0], clip[1])
47
+ if isinstance(trim, int or float):
48
+ trim = (0, trim)
49
+ elif isinstance(trim, tuple) and len(trim) > 1:
50
+ trim = (trim[0], trim[1])
51
51
  else:
52
- raise ValueError(f"Invalid clip type or length: {type(clip)}, len={len(clip)}")
52
+ raise ValueError(f"Invalid clip type or length: {type(trim)}, len={len(trim)}")
53
53
 
54
54
  if path.suffix == ".txt":
55
55
  with open(str(path), "r") as f:
@@ -60,11 +60,11 @@ def load_ann(path, clip=None):
60
60
 
61
61
  # Incase user has clipped the audio signal, we adjust the annotation
62
62
  # to match the clipped audio
63
- if clip is not None:
64
- offset = clip[0]
63
+ if trim is not None:
64
+ offset = trim[0]
65
65
  # Clamp annotation to clip boundaries
66
- new_start = max(start, clip[0]) - offset
67
- new_end = min(end, clip[1]) - offset
66
+ new_start = max(start, trim[0]) - offset
67
+ new_end = min(end, trim[1]) - offset
68
68
 
69
69
  # only keep if there's still overlap
70
70
  if new_start < new_end:
@@ -89,11 +89,11 @@ def load_ann(path, clip=None):
89
89
 
90
90
  # Incase user has clipped the audio signal, we adjust the annotation
91
91
  # to match the clipped audio
92
- if clip is not None:
93
- offset = clip[0]
92
+ if trim is not None:
93
+ offset = trim[0]
94
94
  # Clamp annotation to clip boundaries
95
- new_start = max(start, clip[0]) - offset
96
- new_end = min(end, clip[1]) - offset
95
+ new_start = max(start, trim[0]) - offset
96
+ new_end = min(end, trim[1]) - offset
97
97
 
98
98
  # only keep if there's still overlap
99
99
  if new_start < new_end:
@@ -1,106 +1,155 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
 
4
- import soundfile as sf
5
- from scipy.signal import resample
4
+ import subprocess
5
+ import numpy as np
6
+ import imageio_ffmpeg as ffmpeg
6
7
  from pathlib import Path
7
- import tempfile
8
- from scipy.signal import resample
9
- from .youtube_downloader import download
10
- from .audio_converter import convert
8
+ import re
11
9
 
10
+ def _get_audio_info_ffmpeg(path: Path):
11
+ """
12
+ To get the original sampling rate and number of
13
+ channels of a given audio file by parsing the
14
+ metadata. (No extra tool required).
12
15
 
13
- def load(path, sr=None, clip=None):
16
+ Parameters
17
+ ----------
18
+ audiofp: PathLike
19
+ - Audio filepath
20
+
21
+ Returns
22
+ -------
23
+ int
24
+ - Original sampling rate (hz)
25
+ int
26
+ - Number of channels
14
27
  """
15
- Loads audio file from various sources.
28
+ ffmpeg_exe = ffmpeg.get_ffmpeg_exe()
29
+ cmd = [ffmpeg_exe, "-i", str(path)]
30
+ proc = subprocess.run(cmd, stderr=subprocess.PIPE, text=True)
31
+ text = proc.stderr
32
+
33
+ # Example parse: "Stream #0:0: Audio: mp3, 44100 Hz, stereo, ..."
34
+ m = re.search(r'Audio:.*?(\d+)\s*Hz.*?(mono|stereo)', text)
35
+ if not m:
36
+ raise RuntimeError("Could not parse audio info")
37
+ sr = int(m.group(1))
38
+ channels = 1 if m.group(2) == "mono" else 2
39
+ return sr, channels
16
40
 
17
- .. code-block:: python
18
-
19
- import modusa as ms
20
- audio_fp = ms.load(
21
- "https://www.youtube.com/watch?v=lIpw9-Y_N0g",
22
- sr = None, clip=(5, 10))
41
+ def _load_audio_from_youtube(url: str):
42
+ """
43
+ Download audio from a YouTube URL, convert it to WAV, and return the path.
44
+
45
+ Parameters
46
+ ----------
47
+ url : str
48
+ YouTube video URL.
49
+
50
+ Returns
51
+ -------
52
+ Path
53
+ Path to the converted WAV file (you can delete it later).
54
+ """
55
+ from modusa.tools.youtube_downloader import download
56
+ from modusa.tools.audio_converter import convert
57
+ import tempfile
58
+
59
+ # Temporary directory to hold files (auto-created, not auto-deleted)
60
+ tmpdir = Path(tempfile.mkdtemp())
61
+
62
+ # Download YouTube audio (e.g. .m4a or .webm)
63
+ audio_fp: Path = download(url=url, content_type="audio", output_dir=tmpdir)
64
+
65
+ # Convert downloaded file to .wav
66
+ wav_audio_fp: Path = convert(inp_audio_fp=audio_fp, output_audio_fp=audio_fp.with_suffix(".wav"))
67
+
68
+ # Return path to the WAV file
69
+ return wav_audio_fp
70
+
71
+ #---------------------
72
+ # Main Function
73
+ #---------------------
74
+ def load(path, sr=None, trim=None, ch=None):
75
+ """
76
+ Lightweight audio loader using imageio-ffmpeg.
23
77
 
24
78
  Parameters
25
79
  ----------
26
- path: str
27
- - Path to the audio
28
- - Youtube URL
29
- sr: int | None
80
+ path: PathLike/str/URL
81
+ - Path to the audio file / YouTube video
82
+ sr: int
30
83
  - Sampling rate to load the audio in.
31
- clip: number | tuple[number, number] | None
32
- - Which segment of the audio you want.
33
- - Eg., 10 => First 10 sec, (5, 10) => 5 to 10 second
34
- - Default: None => Entire audio.
84
+ - Default: None => Use the original sampling rate
85
+ trim: tuple[number, number]
86
+ - (start, end) in seconds to trim the audio clip.
87
+ - Default: None => No trimming
88
+ ch: int
89
+ - 1 for mono and 2 for stereo
90
+ - Default: None => Use the original number of channels.
35
91
 
36
- Return
37
- ------
92
+ Returns
93
+ -------
38
94
  np.ndarray
39
- - Audio signal.
40
- int
41
- - Sampling rate of the loaded audio signal.
42
- title
43
- - Title of the loaded audio.
44
- - Filename without extension or YouTube title.
95
+ - Audio signal Float32 waveform in [-1, 1].
96
+ int:
97
+ Sampling rate.
98
+ str:
99
+ File name stem.
45
100
  """
101
+ path = Path(path)
102
+ ffmpeg_exe = ffmpeg.get_ffmpeg_exe()
46
103
 
47
- # Check if the path is YouTube
48
- if ".youtube." in str(path):
49
- # Download the audio in temp directory using tempfile module
50
- with tempfile.TemporaryDirectory() as tmpdir:
51
- # Download
52
- audio_fp: Path = download(url=path, content_type="audio", output_dir=Path(tmpdir))
53
-
54
- # Convert the audio to ".wav" form for loading
55
- wav_audio_fp: Path = convert(inp_audio_fp=audio_fp, output_audio_fp=audio_fp.with_suffix(".wav"))
56
-
57
- # Load the audio in memory
58
- audio_data, audio_sr = sf.read(wav_audio_fp)
59
- title = audio_fp.stem
60
-
61
- # Convert to mono if it's multi-channel
62
- if audio_data.ndim > 1:
63
- audio_data = audio_data.mean(axis=1)
64
-
65
- # Resample if needed
66
- if sr is not None:
67
- if audio_sr != sr:
68
- n_samples = int(len(audio_data) * sr / audio_sr)
69
- audio_data = resample(audio_data, n_samples)
70
- audio_sr = sr
71
-
72
- else:
73
- # Check if the file exists
74
- fp = Path(path)
104
+ yt = False # Is the path a youtube URL
105
+
106
+ if ".youtube" in str(path):
107
+ yt = True
108
+ try:
109
+ path: Path = _load_audio_from_youtube(url=str(path))
110
+ except Exception as e:
111
+ raise ConnectionRefusedError("unable to download from YouTube")
112
+
113
+ # Find the real sample rate from the file
114
+ if sr is None:
115
+ sr, _ = _get_audio_info_ffmpeg(path)
116
+ if not (sr > 100 and sr < 80000):
117
+ raise Exception(f"Error reading the metadata for original sampling rate {sr}, please set `sr` explicitly")
118
+
119
+ # Find the real number of channels from the file
120
+ if ch is None:
121
+ _, ch = _get_audio_info_ffmpeg(path)
75
122
 
76
- if not fp.exists():
77
- raise FileNotFoundError(f"{path} does not exist.")
123
+ if ch not in [1, 2]:
124
+ raise Exception(f"Error reading the metadata for number of channels {ch}, please set `ch` explicitly")
78
125
 
79
- # Load the audio in memory
80
- audio_data, audio_sr = sf.read(fp)
81
- title = fp.stem
126
+ cmd = [ffmpeg_exe]
127
+
128
+ # Optional trimming
129
+ if trim is not None:
130
+ start, end = trim
131
+ duration = end - start
132
+ cmd += ["-ss", str(start), "-t", str(duration)]
82
133
 
83
- # Convert to mono if it's multi-channel
84
- if audio_data.ndim > 1:
85
- audio_data = audio_data.mean(axis=1)
86
-
87
- # Resample if needed
88
- if sr is not None:
89
- if audio_sr != sr:
90
- n_samples = int(len(audio_data) * sr / audio_sr)
91
- audio_data = resample(audio_data, n_samples)
92
- audio_sr = sr
93
-
94
- # Clip the audio signal as per needed
95
- if clip is not None:
96
- # Map clip input to the right format
97
- if isinstance(clip, int or float):
98
- clip = (0, clip)
99
- elif isinstance(clip, tuple) and len(clip) > 1:
100
- clip = (clip[0], clip[1])
101
- else:
102
- raise ValueError(f"Invalid clip type or length: {type(clip)}, len={len(clip)}")
134
+ cmd += ["-i", str(path), "-f", "s16le", "-acodec", "pcm_s16le"]
135
+ cmd += ["-ar", str(sr)]
136
+ cmd += ["-ac", str(ch)]
103
137
 
104
- audio_data = audio_data[int(clip[0]*audio_sr):int(clip[1]*audio_sr)]
138
+ cmd += ["-"]
139
+
140
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
141
+ raw = proc.stdout.read()
142
+ proc.wait()
105
143
 
106
- return audio_data, audio_sr, title
144
+ audio = np.frombuffer(raw, np.int16).astype(np.float32) / 32768.0
145
+
146
+ # Stereo reshaping if forced
147
+ if ch == 2:
148
+ audio = audio.reshape(-1, 2).T
149
+
150
+ # Delete the file if downloaded from youtube
151
+ if yt:
152
+ path.unlink(missing_ok=True)
153
+ path.parent.rmdir()
154
+
155
+ return audio, sr, path.stem
@@ -5,17 +5,27 @@ from pathlib import Path
5
5
  import numpy as np
6
6
  import base64
7
7
 
8
- def play(
9
- y: np.ndarray,
10
- sr: float,
11
- clip: tuple[float, float] | None = None,
12
- label: str | None = None,
13
- ) -> None:
8
+ def play(y, sr: float, clip=None, label=None):
14
9
  """
15
- Audio player with optional clip selection, transcription-style label,
16
- and an embedded bottom-right logo (../images/icon.png).
10
+ Audio player with optional clip selection, transcription-style label.
11
+
12
+ Parameters
13
+ ----------
14
+ y: ndarray
15
+ - Audio signal.
16
+ sr: float
17
+ - Sampling rate.
18
+ clip: tuple[float, float] | None
19
+ - The portion from the audio signal to be played.
20
+ label: str | None
21
+ - Could be transcription/labels attached to the audio.
22
+
23
+ Returns
24
+ -------
25
+ None
17
26
  """
18
- start_time, end_time = 0.0, len(y) / sr
27
+ start_time = 0.0
28
+ end_time = len(y) / sr if y.ndim < 2 else y[0].size / sr
19
29
 
20
30
  # Optional clip selection
21
31
  if clip is not None:
@@ -84,7 +94,7 @@ def play(
84
94
  box-shadow:0 1px 3px rgba(0,0,0,0.05);
85
95
  ">
86
96
  {label_html}
87
- <div style="margin-top:10px;">
97
+ <div style="margin-top:10px; margin-bottom:10px">
88
98
  {audio_html}
89
99
  </div>
90
100
  {logo_html}
modusa/tools/plotter.py CHANGED
@@ -209,6 +209,10 @@ class Fig:
209
209
 
210
210
  axs[-1, 0].tick_params(bottom=True, labelbottom=True)
211
211
 
212
+ # Add the figure title on top-left (if any)
213
+ if self._fig_num is not None:
214
+ fig.suptitle(f'fig - {self._fig_num}', fontsize=12, fontweight='bold', x=0.01, ha='left', va='top', y=0.98)
215
+
212
216
  # xlim should be applied on reference subplot, rest all subplots will automatically adjust
213
217
  if xlim is not None:
214
218
  axs[0, 0].set_xlim(xlim)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: modusa
3
- Version: 0.4.20
3
+ Version: 0.4.23
4
4
  Summary: A modular signal analysis python library.
5
5
  Author-Email: Ankit Anand <ankit0.anand0@gmail.com>
6
6
  License: MIT
@@ -9,10 +9,8 @@ Requires-Dist: numpy>=2.2.6
9
9
  Requires-Dist: matplotlib>=3.10.3
10
10
  Requires-Dist: yt-dlp==2025.9.23
11
11
  Requires-Dist: IPython>=9.5.0
12
- Requires-Dist: sounddevice>=0.5.2
13
12
  Requires-Dist: ipywidgets>=8.1.7
14
- Requires-Dist: soundfile>=0.13.1
15
- Requires-Dist: scipy>=1.16.2
13
+ Requires-Dist: imageio-ffmpeg>=0.6.0
16
14
  Description-Content-Type: text/markdown
17
15
 
18
16
  # modusa
@@ -1,8 +1,9 @@
1
- modusa-0.4.20.dist-info/METADATA,sha256=pJ932mbsVsJ8XTbIHPX3h1RZa5k-GUBk0i5zgsqGMKY,1467
2
- modusa-0.4.20.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
3
- modusa-0.4.20.dist-info/entry_points.txt,sha256=fmKpleVXj6CdaBVL14WoEy6xx7JQCs85jvzwTi3lePM,73
4
- modusa-0.4.20.dist-info/licenses/LICENSE.md,sha256=JTaXAjx5awk76VArKCx5dUW8vmLEWsL_ZlR7-umaHbA,1078
5
- modusa/__init__.py,sha256=x95HzUy8e7c6XeEIZsuy6QmE82fcfJsnWvTUm8u6WyE,324
1
+ modusa-0.4.23.dist-info/METADATA,sha256=VcG0sWHvNgl2n0jMCiKiWM2BAeGHUJorSp0pFogxx90,1408
2
+ modusa-0.4.23.dist-info/WHEEL,sha256=9P2ygRxDrTJz3gsagc0Z96ukrxjr-LFBGOgv3AuKlCA,90
3
+ modusa-0.4.23.dist-info/entry_points.txt,sha256=fmKpleVXj6CdaBVL14WoEy6xx7JQCs85jvzwTi3lePM,73
4
+ modusa-0.4.23.dist-info/licenses/LICENSE.md,sha256=JTaXAjx5awk76VArKCx5dUW8vmLEWsL_ZlR7-umaHbA,1078
5
+ modusa/.DS_Store,sha256=hHecruWmea5u7ITMSdq62sVVdvT2I5lTN20gTfD3Btk,8196
6
+ modusa/__init__.py,sha256=90isCiArafICPkaTgChbljh2F5o_iDKhqMz1gArlDOE,324
6
7
  modusa/config.py,sha256=bTqK4t00FZqERVITrxW_q284aDDJAa9aMSfFknfR-oU,280
7
8
  modusa/decorators.py,sha256=8zeNX_wE37O6Vp0ysR4-WCZaEL8mq8dyCF_I5DHOzks,5905
8
9
  modusa/devtools/generate_docs_source.py,sha256=UDflHsk-Yh9-3YJTVBzKL32y8hcxiRgAlFEBTMiDqwM,3301
@@ -42,15 +43,15 @@ modusa/plugins/__init__.py,sha256=r1Bf5mnrVKRIwxboutY1iGzDy4EPQhqpk1kSW7iJj_Q,54
42
43
  modusa/plugins/base.py,sha256=Bh_1Bja7fOymFsCgwhXDbV6ys3D8muNrPwrfDrG_G_A,2382
43
44
  modusa/tools/__init__.py,sha256=S7P1uYckyUdkha2UX9uj4P7mqpF6cc5SHqiW6NEupgs,342
44
45
  modusa/tools/_plotter_old.py,sha256=KGow7mihA2H1WNq7s5bpivhCgGo2qVIeDaO6iabpsrg,19294
45
- modusa/tools/ann_loader.py,sha256=BEdwAh_lccx_SnAO3bNMeY3O5zGiJlH2o4snWmXj8eQ,3034
46
+ modusa/tools/ann_loader.py,sha256=m6Qu6jXnQ8LfUhKItoHSaHlGxUyzUJlGEyu4_50qJ8w,3099
46
47
  modusa/tools/audio_converter.py,sha256=415qBoPm2sBIuBSI7m1XBKm0AbmVmPydIPPr-uO8D3c,1778
47
- modusa/tools/audio_loader.py,sha256=n9Q9t_GmlE8AtioVwRcXX3rnd6PkbGTx-hAoNgUnNOQ,2780
48
- modusa/tools/audio_player.py,sha256=BI1ZmETxnmJACDHZMsbplzspwAx-_oKe3SpaHz-MFds,2295
48
+ modusa/tools/audio_loader.py,sha256=nQl-E8xM1wdoYzZ3yGw25FJY8EogbqIZzPSMFt4Fv1E,3899
49
+ modusa/tools/audio_player.py,sha256=kyBUnodkOE9Ox-hKHkfPeGAQ1RPTddbZYXO1ezz6-9w,2494
49
50
  modusa/tools/audio_recorder.py,sha256=K_LGqsPdjTdf3figEZTSQLmgMzYWgz18HTO8C1j5fE4,2788
50
51
  modusa/tools/audio_saver.py,sha256=ldzfr_AydsHTnKbxmBLJblN-hLzTmOlppOm306xI4Ug,510
51
52
  modusa/tools/base.py,sha256=C0ESJ0mIfjjRlAkRbSetNtMoOfS6IrHBjexRp3l_Mh4,1293
52
53
  modusa/tools/math_ops.py,sha256=ZZ7U4DgqT7cOeE7_Lzi_Qq-48WYfwR9_osbZwTmE9eg,8690
53
- modusa/tools/plotter.py,sha256=V0CcRnXkE4tkTZQe6v0LI0ucEagqeudOZ-xoQw2clTc,30667
54
+ modusa/tools/plotter.py,sha256=RrELZYgnyUVhGzcNfpTvyZvVTmZ6dvQmN9YSQoUjmaM,30860
54
55
  modusa/tools/youtube_downloader.py,sha256=hB_X8-7nOHXOlxg6vv3wyhBLoAsWyomrULP6_uCQL7s,1698
55
56
  modusa/utils/__init__.py,sha256=1oLL20yLB1GL9IbFiZD8OReDqiCpFr-yetIR6x1cNkI,23
56
57
  modusa/utils/config.py,sha256=cuGbqbovx5WDQq5rw3hIKcv3CnE5NttjacSOWnP1yu4,576
@@ -58,4 +59,4 @@ modusa/utils/excp.py,sha256=L9vhaGjKpv9viJYdmC9n5ndmk2GVbUBuFyZyhAQZmWY,906
58
59
  modusa/utils/logger.py,sha256=K0rsnObeNKCxlNeSnVnJeRhgfmob6riB2uyU7h3dDmA,571
59
60
  modusa/utils/np_func_cat.py,sha256=TyIFgRc6bARRMDnZxlVURO5Z0I-GWhxRONYyIv-Vwxs,1007
60
61
  modusa/utils/plot.py,sha256=s_vNdxvKfwxEngvJPgrF1PcmxZNnNaaXPViHWjyjJ-c,5335
61
- modusa-0.4.20.dist-info/RECORD,,
62
+ modusa-0.4.23.dist-info/RECORD,,