PyPI - modusa - Versions diffs - 0.4.19__tar.gz → 0.4.21__tar.gz - Mend

modusa 0.4.19tar.gz → 0.4.21tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

{modusa-0.4.19 → modusa-0.4.21}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: modusa
-Version: 0.4.19
+Version: 0.4.21
 Summary: A modular signal analysis python library.
 Author-Email: Ankit Anand <ankit0.anand0@gmail.com>
 License: MIT

{modusa-0.4.19 → modusa-0.4.21}/pyproject.toml RENAMED Viewed

@@ -17,7 +17,7 @@ dependencies = [
 ]
 requires-python = ">=3.11"
 readme = "README.md"
-version = "0.4.19"
+version = "0.4.21"
 [project.license]
 text = "MIT"

{modusa-0.4.19 → modusa-0.4.21}/src/modusa/__init__.py RENAMED Viewed

@@ -8,4 +8,4 @@ from modusa.tools import play, convert, record, save
 from modusa.tools import download
 from modusa.tools import load, load_ann
-__version__ = "0.4.19"
+__version__ = "0.4.21"

{modusa-0.4.19 → modusa-0.4.21}/src/modusa/tools/ann_loader.py RENAMED Viewed

@@ -6,7 +6,7 @@
 # Email: ankit0.anand0@gmail.com
 #---------------------------------
-def load_ann(path, clip=None):
+def load_ann(path, trim=None):
 	"""
 	Load annotation from audatity label
 	text file and also ctm file.
@@ -15,9 +15,9 @@ def load_ann(path, clip=None):
 	----------
 	path: str
 		- label text/ctm file path.
-	clip: tuple[number, number] | number | None
-		- Incase you clipped the audio signal, this parameter will help clip the annotation.
-		- If you clip the audio, say from (10, 20), set the clip to (10, 20).
+	trim: tuple[number, number] | number | None
+		- Incase you trimmed the audio signal, this parameter will help clip the annotation making sure that the timings are aligned to the trimmed audio.
+		- If you trimmed the audio, say from (10, 20), set the trim to (10, 20).
 		- Default: None
 	Returns
@@ -42,14 +42,14 @@ def load_ann(path, clip=None):
 	ann = [] # This will store the annotation
 	# Clipping the annotation to match with the clipped audio
-	if clip is not None:
+	if trim is not None:
 		# Map clip input to the right format
-		if isinstance(clip, int or float):
-			clip = (0, clip)
-		elif isinstance(clip, tuple) and len(clip) > 1:
-			clip = (clip[0], clip[1])
+		if isinstance(trim, int or float):
+			trim = (0, trim)
+		elif isinstance(trim, tuple) and len(trim) > 1:
+			trim = (trim[0], trim[1])
 		else:
-			raise ValueError(f"Invalid clip type or length: {type(clip)}, len={len(clip)}")
+			raise ValueError(f"Invalid clip type or length: {type(trim)}, len={len(trim)}")
 	if path.suffix == ".txt":
 		with open(str(path), "r") as f:
@@ -60,11 +60,11 @@ def load_ann(path, clip=None):
 				# Incase user has clipped the audio signal, we adjust the annotation
 				# to match the clipped audio
-				if clip is not None:
-					offset = clip[0]
+				if trim is not None:
+					offset = trim[0]
 					# Clamp annotation to clip boundaries
-					new_start = max(start, clip[0]) - offset
-					new_end   = min(end, clip[1]) - offset
+					new_start = max(start, trim[0]) - offset
+					new_end   = min(end, trim[1]) - offset
 					# only keep if there's still overlap
 					if new_start < new_end:
@@ -89,11 +89,11 @@ def load_ann(path, clip=None):
 				# Incase user has clipped the audio signal, we adjust the annotation
 				# to match the clipped audio
-				if clip is not None:
-					offset = clip[0]
+				if trim is not None:
+					offset = trim[0]
 					# Clamp annotation to clip boundaries
-					new_start = max(start, clip[0]) - offset
-					new_end   = min(end, clip[1]) - offset
+					new_start = max(start, trim[0]) - offset
+					new_end   = min(end, trim[1]) - offset
 					# only keep if there's still overlap
 					if new_start < new_end:

{modusa-0.4.19 → modusa-0.4.21}/src/modusa/tools/audio_loader.py RENAMED Viewed

@@ -3,14 +3,14 @@
 import soundfile as sf
 from scipy.signal import resample
+import numpy as np
 from pathlib import Path
 import tempfile
 from scipy.signal import resample
 from .youtube_downloader import download
 from .audio_converter import convert
-def load(path, sr=None, clip=None):
+def load(path, sr=None, trim=None, mono=True):
 	"""
 	Loads audio file from various sources.
@@ -19,19 +19,21 @@ def load(path, sr=None, clip=None):
 		import modusa as ms
 		audio_fp = ms.load(
 			"https://www.youtube.com/watch?v=lIpw9-Y_N0g",
-			sr = None, clip=(5, 10))
+			sr = None, trim=(5, 10))
 	Parameters
 	----------
 	path: str
-		- Path to the audio
-		- Youtube URL
+		- Path to the audio file.
+		- YouTube URL.
 	sr: int | None
 		- Sampling rate to load the audio in.
-	clip: number | tuple[number, number] | None
-		- Which segment of the audio you want.
-		- Eg., 10 => First 10 sec, (5, 10) => 5 to 10 second
+	trim: number | tuple[number, number] | None
+		- Segment of the audio to load.
+		- Example: 10 => First 10 seconds, (5, 10) => 5 to 10 seconds.
 		- Default: None => Entire audio.
+	mono: bool
+		- If True, loads the signal in mono.
 	Return
 	------
@@ -43,7 +45,6 @@ def load(path, sr=None, clip=None):
 		- Title of the loaded audio.
 		- Filename without extension or YouTube title.
 	"""
 	# Check if the path is YouTube
 	if ".youtube." in str(path):
 		# Download the audio in temp directory using tempfile module
@@ -57,50 +58,52 @@ def load(path, sr=None, clip=None):
 			# Load the audio in memory
 			audio_data, audio_sr = sf.read(wav_audio_fp)
 			title = audio_fp.stem
-			# Convert to mono if it's multi-channel
-			if audio_data.ndim > 1:
-				audio_data = audio_data.mean(axis=1)
-			# Resample if needed
-			if sr is not None:
-				if audio_sr != sr:
-					n_samples = int(len(audio_data) * sr / audio_sr)
-					audio_data = resample(audio_data, n_samples)
-					audio_sr = sr
 	else:
 		# Check if the file exists
 		fp = Path(path)
 		if not fp.exists():
 			raise FileNotFoundError(f"{path} does not exist.")
 		# Load the audio in memory
 		audio_data, audio_sr = sf.read(fp)
 		title = fp.stem
-		# Convert to mono if it's multi-channel
-		if audio_data.ndim > 1:
-			audio_data = audio_data.mean(axis=1)
+	# Convert to mono if requested and it's multi-channel
+	if mono and audio_data.ndim > 1:
+		audio_data = audio_data.mean(axis=1)
+	# Resample if needed
+	if sr is not None and audio_sr != sr:
+		n_samples = int(len(audio_data) * sr / audio_sr)
+		if audio_data.ndim == 1:
+			# Mono
+			audio_data = resample(audio_data, n_samples)
+		else:
+			# Stereo or multi-channel: resample each channel independently
+			audio_data = np.stack([
+				resample(audio_data[:, ch], n_samples)
+				for ch in range(audio_data.shape[1])
+			], axis=1)
-		# Resample if needed
-		if sr is not None:
-			if audio_sr != sr:
-				n_samples = int(len(audio_data) * sr / audio_sr)
-				audio_data = resample(audio_data, n_samples)
-				audio_sr = sr
-	# Clip the audio signal as per needed
-	if clip is not None:
-		# Map clip input to the right format
-		if isinstance(clip, int or float):
-			clip = (0, clip)
-		elif isinstance(clip, tuple) and len(clip) > 1:
-			clip = (clip[0], clip[1])
+		audio_sr = sr
+	# Trim if requested
+	if trim is not None:
+		if isinstance(trim, (int, float)):
+			trim = (0, trim)
+		elif isinstance(trim, tuple) and len(trim) > 1:
+			trim = (trim[0], trim[1])
 		else:
-			raise ValueError(f"Invalid clip type or length: {type(clip)}, len={len(clip)}")
+			raise ValueError(f"Invalid trim type or length: {type(trim)}, len={len(trim)}")
+		start = int(trim[0] * audio_sr)
+		end = int(trim[1] * audio_sr)
+		audio_data = audio_data[start:end]
+	# Clip to avoid out-of-range playback issues
+	if np.issubdtype(audio_data.dtype, np.floating):
+		audio_data = np.clip(audio_data, -1.0, 1.0)
-		audio_data = audio_data[int(clip[0]*audio_sr):int(clip[1]*audio_sr)]
-	return audio_data, audio_sr, title
+	return audio_data.T, audio_sr, title

{modusa-0.4.19 → modusa-0.4.21}/src/modusa/tools/audio_player.py RENAMED Viewed

@@ -1,28 +1,31 @@
 #!/usr/bin/env python3
-from IPython.display import Audio, HTML, display
-import numpy as np
-from IPython.display import Audio, HTML, display
-import numpy as np
-from pathlib import Path
 from IPython.display import Audio, HTML, display
 from pathlib import Path
 import numpy as np
 import base64
-def play(
-	y: np.ndarray,
-	sr: float,
-	clip: tuple[float, float] | None = None,
-	label: str | None = None,
-) -> None:
+def play(y, sr: float, clip=None, label=None):
 	"""
-	Audio player with optional clip selection, transcription-style label,
-	and an embedded bottom-right logo (../images/icon.png).
+	Audio player with optional clip selection, transcription-style label.
+	Parameters
+	----------
+	y: ndarray
+		- Audio signal.
+	sr: float
+		- Sampling rate.
+	clip: tuple[float, float] | None
+		- The portion from the audio signal to be played.
+	label: str | None
+		- Could be transcription/labels attached to the audio.
+	Returns
+	-------
+	None
 	"""
-	start_time, end_time = 0.0, len(y) / sr
+	start_time = 0.0
+	end_time = len(y) / sr if y.ndim < 2 else y[0].size / sr
 	# Optional clip selection
 	if clip is not None:
@@ -91,7 +94,7 @@ def play(
 		box-shadow:0 1px 3px rgba(0,0,0,0.05);
 	">
 		{label_html}
-		<div style="margin-top:10px;">
+		<div style="margin-top:10px; margin-bottom:10px">
 			{audio_html}
 		</div>
 		{logo_html}

{modusa-0.4.19 → modusa-0.4.21}/src/modusa/tools/plotter.py RENAMED Viewed

@@ -209,6 +209,10 @@ class Fig:
 		axs[-1, 0].tick_params(bottom=True, labelbottom=True)
+		# Add the figure title on top-left (if any)
+		if self._fig_num is not None:
+			fig.suptitle(f'fig - {self._fig_num}', fontsize=12, fontweight='bold', x=0.01, ha='left', va='top', y=0.98)
 		# xlim should be applied on reference subplot, rest all subplots will automatically adjust
 		if xlim is not None:
 			axs[0, 0].set_xlim(xlim)