PyPI - modusa - Versions diffs - 0.4.21__tar.gz → 0.4.24__tar.gz - Mend

modusa 0.4.21tar.gz → 0.4.24tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

{modusa-0.4.21 → modusa-0.4.24}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: modusa
-Version: 0.4.21
+Version: 0.4.24
 Summary: A modular signal analysis python library.
 Author-Email: Ankit Anand <ankit0.anand0@gmail.com>
 License: MIT
@@ -11,8 +11,7 @@ Requires-Dist: yt-dlp==2025.9.23
 Requires-Dist: IPython>=9.5.0
 Requires-Dist: sounddevice>=0.5.2
 Requires-Dist: ipywidgets>=8.1.7
-Requires-Dist: soundfile>=0.13.1
-Requires-Dist: scipy>=1.16.2
+Requires-Dist: imageio-ffmpeg>=0.6.0
 Description-Content-Type: text/markdown
 # modusa

{modusa-0.4.21 → modusa-0.4.24}/pyproject.toml RENAMED Viewed

@@ -12,12 +12,11 @@ dependencies = [
     "IPython>=9.5.0",
     "sounddevice>=0.5.2",
     "ipywidgets>=8.1.7",
-    "soundfile>=0.13.1",
-    "scipy>=1.16.2",
+    "imageio-ffmpeg>=0.6.0",
 ]
 requires-python = ">=3.11"
 readme = "README.md"
-version = "0.4.21"
+version = "0.4.24"
 [project.license]
 text = "MIT"

modusa-0.4.24/src/modusa/.DS_Store ADDED Viewed

Binary file

{modusa-0.4.21 → modusa-0.4.24}/src/modusa/__init__.py RENAMED Viewed

@@ -8,4 +8,4 @@ from modusa.tools import play, convert, record, save
 from modusa.tools import download
 from modusa.tools import load, load_ann
-__version__ = "0.4.21"
+__version__ = "0.4.24"

modusa-0.4.24/src/modusa/tools/audio_loader.py ADDED Viewed

@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+import subprocess
+import numpy as np
+import imageio_ffmpeg as ffmpeg
+from pathlib import Path
+import re
+def _get_audio_info_ffmpeg(path: Path):
+	"""
+	To get the original sampling rate and number of
+	channels of a given audio file by parsing the
+	metadata. (No extra tool required).
+	Parameters
+	----------
+	audiofp: PathLike
+		- Audio filepath
+	Returns
+	-------
+	int
+		- Original sampling rate (hz)
+	int
+		- Number of channels
+	"""
+	ffmpeg_exe = ffmpeg.get_ffmpeg_exe()
+	cmd = [ffmpeg_exe, "-i", str(path)]
+	proc = subprocess.run(cmd, stderr=subprocess.PIPE, text=True)
+	text = proc.stderr
+	# Example parse: "Stream #0:0: Audio: mp3, 44100 Hz, stereo, ..."
+	m = re.search(r'Audio:.*?(\d+)\s*Hz.*?(mono|stereo)', text)
+	if not m:
+		raise RuntimeError("Could not parse audio info")
+	sr = int(m.group(1))
+	channels = 1 if m.group(2) == "mono" else 2
+	return sr, channels
+def _load_audio_from_youtube(url: str):
+	"""
+	Download audio from a YouTube URL, convert it to WAV, and return the path.
+	Parameters
+	----------
+	url : str
+		YouTube video URL.
+	Returns
+	-------
+	Path
+		Path to the converted WAV file (you can delete it later).
+	"""
+	from modusa.tools.youtube_downloader import download
+	from modusa.tools.audio_converter import convert
+	import tempfile
+	# Temporary directory to hold files (auto-created, not auto-deleted)
+	tmpdir = Path(tempfile.mkdtemp())
+	# Download YouTube audio (e.g. .m4a or .webm)
+	audio_fp: Path = download(url=url, content_type="audio", output_dir=tmpdir)
+	# Convert downloaded file to .wav
+	wav_audio_fp: Path = convert(inp_audio_fp=audio_fp, output_audio_fp=audio_fp.with_suffix(".wav"))
+	# Return path to the WAV file
+	return wav_audio_fp
+#---------------------
+# Main Function
+#---------------------
+def load(path, sr=None, trim=None, ch=None):
+	"""
+	Lightweight audio loader using imageio-ffmpeg.
+	Parameters
+	----------
+	path: PathLike/str/URL
+		- Path to the audio file / YouTube video
+	sr: int
+		- Sampling rate to load the audio in.
+		- Default: None => Use the original sampling rate
+	trim: tuple[number, number]
+		- (start, end) in seconds to trim the audio clip.
+		- Default: None => No trimming
+	ch: int
+		- 1 for mono and 2 for stereo
+		- Default: None => Use the original number of channels.
+	Returns
+	-------
+	np.ndarray
+		- Audio signal Float32 waveform in [-1, 1].
+	int:
+		Sampling rate.
+	str:
+		File name stem.
+	"""
+	path = Path(path)
+	ffmpeg_exe = ffmpeg.get_ffmpeg_exe()
+	yt = False # Is the path a youtube URL
+	if ".youtube" in str(path):
+		yt = True
+		try:
+			path: Path = _load_audio_from_youtube(url=str(path))
+		except Exception as e:
+			raise ConnectionRefusedError("unable to download from YouTube")
+	# Find the real sample rate from the file
+	if sr is None:
+		sr, _ = _get_audio_info_ffmpeg(path)
+		if not (sr > 100 and sr < 80000):
+			raise Exception(f"Error reading the metadata for original sampling rate {sr}, please set `sr` explicitly")
+	# Find the real number of channels from the file
+	if ch is None:
+		_, ch = _get_audio_info_ffmpeg(path)
+		if ch not in [1, 2]:
+			raise Exception(f"Error reading the metadata for number of channels {ch}, please set `ch` explicitly")
+	cmd = [ffmpeg_exe]
+	# Optional trimming
+	if trim is not None:
+		start, end = trim
+		duration = end - start
+		cmd += ["-ss", str(start), "-t", str(duration)]
+	cmd += ["-i", str(path), "-f", "s16le", "-acodec", "pcm_s16le"]
+	cmd += ["-ar", str(sr)]
+	cmd += ["-ac", str(ch)]
+	cmd += ["-"]
+	proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+	raw = proc.stdout.read()
+	proc.wait()
+	audio = np.frombuffer(raw, np.int16).astype(np.float32) / 32768.0
+	# Stereo reshaping if forced
+	if ch == 2:
+		audio = audio.reshape(-1, 2).T
+	# Delete the file if downloaded from youtube
+	if yt:
+		path.unlink(missing_ok=True)
+		path.parent.rmdir()
+	return audio, sr, path.stem

modusa-0.4.24/tests/.DS_Store ADDED Viewed

Binary file

modusa-0.4.24/tests/test_load.py ADDED Viewed

@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+#---------------------------------
+# Author: Ankit Anand
+# Date: 21/10/25
+# Email: ankit0.anand0@gmail.com
+#---------------------------------
+import pytest
+import modusa as ms
+from pathlib import Path
+#----------------------------------
+# Loading different audio formats
+#----------------------------------
+this_dir = Path(__file__).parents[0].resolve()
+def test_load_aac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aac")
+	assert title == "sample"
+def test_load_aiff():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aiff")
+	assert title == "sample"
+def test_load_flac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.flac")
+	assert title == "sample"
+def test_load_m4a():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.m4a")
+	assert title == "sample"
+def test_load_mp3():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.mp3")
+	assert title == "sample"
+def test_load_opus():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.opus")
+	assert title == "sample"
+def test_load_wav():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.wav")
+	assert title == "sample"
+#----------------------------------
+# Resampe feature
+#----------------------------------
+SR = 16000 # Hz
+def test_load_with_resample_aac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aac", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+def test_load_with_resample_aiff():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aiff", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+def test_load_with_resample_flac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.flac", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+def test_load_with_resample_m4a():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.m4a", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+def test_load_with_resample_mp3():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.mp3", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+def test_load_with_resample_opus():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.opus", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+def test_load_with_resample_wav():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.wav", sr=SR)
+	assert sr == SR
+	assert title == "sample"
+#----------------------------------
+# Trim feature
+#----------------------------------
+def test_load_with_trim_aac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aac", trim=(0, 5.3))
+	assert title == "sample"
+def test_load_with_trim_aiff():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aiff", trim=(0, 5.3))
+	assert title == "sample"
+def test_load_with_trim_flac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.flac", trim=(0, 5.3))
+	assert title == "sample"
+def test_load_with_trim_m4a():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.m4a", trim=(0, 5.3))
+	assert title == "sample"
+def test_load_with_trim_mp3():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.mp3", trim=(0, 5.3))
+	assert title == "sample"
+def test_load_with_trim_opus():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.opus", trim=(0, 5.3))
+	assert title == "sample"
+def test_load_with_trim_wav():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.wav", trim=(0, 5.3))
+	assert title == "sample"
+#----------------------------------
+# Mono feature
+#----------------------------------
+def test_load_in_stereo_aac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aac", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+def test_load_in_stereo_aiff():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.aiff", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+def test_load_in_stereo_flac():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.flac", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+def test_load_in_stereo_m4a():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.m4a", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+def test_load_in_stereo_mp3():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.mp3", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+def test_load_in_stereo_opus():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.opus", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+def test_load_in_stereo_wav():
+	y, sr, title = ms.load(this_dir / "testdata/audio-formats/sample.wav", ch=2)
+	assert y.ndim == 2
+	assert title == "sample"
+#----------------------------------
+# Load from YouTube
+#----------------------------------
+def test_load_youtube_1():
+	y, sr, title = ms.load("https://www.youtube.com/watch?v=DIU_vmElPkU", ch=1)

modusa-0.4.24/tests/testdata/.DS_Store ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.aac ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.aiff ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.flac ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.m4a ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.mp3 ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.opus ADDED Viewed

Binary file

modusa-0.4.24/tests/testdata/audio-formats/sample.wav ADDED Viewed

Binary file

modusa-0.4.21/src/modusa/tools/audio_loader.py DELETED Viewed

@@ -1,109 +0,0 @@
-#!/usr/bin/env python3
-import soundfile as sf
-from scipy.signal import resample
-import numpy as np
-from pathlib import Path
-import tempfile
-from scipy.signal import resample
-from .youtube_downloader import download
-from .audio_converter import convert
-def load(path, sr=None, trim=None, mono=True):
-	"""
-	Loads audio file from various sources.
-	.. code-block:: python
-		import modusa as ms
-		audio_fp = ms.load(
-			"https://www.youtube.com/watch?v=lIpw9-Y_N0g",
-			sr = None, trim=(5, 10))
-	Parameters
-	----------
-	path: str
-		- Path to the audio file.
-		- YouTube URL.
-	sr: int | None
-		- Sampling rate to load the audio in.
-	trim: number | tuple[number, number] | None
-		- Segment of the audio to load.
-		- Example: 10 => First 10 seconds, (5, 10) => 5 to 10 seconds.
-		- Default: None => Entire audio.
-	mono: bool
-		- If True, loads the signal in mono.
-	Return
-	------
-	np.ndarray
-		- Audio signal.
-	int
-		- Sampling rate of the loaded audio signal.
-	title
-		- Title of the loaded audio.
-		- Filename without extension or YouTube title.
-	"""
-	# Check if the path is YouTube
-	if ".youtube." in str(path):
-		# Download the audio in temp directory using tempfile module
-		with tempfile.TemporaryDirectory() as tmpdir:
-			# Download
-			audio_fp: Path = download(url=path, content_type="audio", output_dir=Path(tmpdir))
-			# Convert the audio to ".wav" form for loading
-			wav_audio_fp: Path = convert(inp_audio_fp=audio_fp, output_audio_fp=audio_fp.with_suffix(".wav"))
-			# Load the audio in memory
-			audio_data, audio_sr = sf.read(wav_audio_fp)
-			title = audio_fp.stem
-	else:
-		# Check if the file exists
-		fp = Path(path)
-		if not fp.exists():
-			raise FileNotFoundError(f"{path} does not exist.")
-		# Load the audio in memory
-		audio_data, audio_sr = sf.read(fp)
-		title = fp.stem
-	# Convert to mono if requested and it's multi-channel
-	if mono and audio_data.ndim > 1:
-		audio_data = audio_data.mean(axis=1)
-	# Resample if needed
-	if sr is not None and audio_sr != sr:
-		n_samples = int(len(audio_data) * sr / audio_sr)
-		if audio_data.ndim == 1:
-			# Mono
-			audio_data = resample(audio_data, n_samples)
-		else:
-			# Stereo or multi-channel: resample each channel independently
-			audio_data = np.stack([
-				resample(audio_data[:, ch], n_samples)
-				for ch in range(audio_data.shape[1])
-			], axis=1)
-		audio_sr = sr
-	# Trim if requested
-	if trim is not None:
-		if isinstance(trim, (int, float)):
-			trim = (0, trim)
-		elif isinstance(trim, tuple) and len(trim) > 1:
-			trim = (trim[0], trim[1])
-		else:
-			raise ValueError(f"Invalid trim type or length: {type(trim)}, len={len(trim)}")
-		start = int(trim[0] * audio_sr)
-		end = int(trim[1] * audio_sr)
-		audio_data = audio_data[start:end]
-	# Clip to avoid out-of-range playback issues
-	if np.issubdtype(audio_data.dtype, np.floating):
-		audio_data = np.clip(audio_data, -1.0, 1.0)
-	return audio_data.T, audio_sr, title