orbit-forensics 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,853 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ ORBIT AI Audio Forensics Script
4
+
5
+ Runs the deep signal-level spectral forensics suite to detect artificial acoustic anomalies:
6
+ - 16kHz rolloff cutoff
7
+ - Phase entropy (instant group delay Shannon entropy)
8
+ - Cepstral checkerboard artifacts (upsampling periodic vocoder artifacts)
9
+ - M/S stereo phase coherence
10
+ - Pre-echo transient ratios
11
+ - Harmonicity ratios (including high-frequency anomalous harmonics)
12
+ - Timing & onset regularity (quantization check)
13
+ - Timbral/spectral evolution variance (flux, centroid, bandwidth, ZCR, MFCCs)
14
+ - Demucs stem-aware isolation diagnostics
15
+
16
+ Usage:
17
+ python scripts/audio_forensics.py <audio_path> [--max-length 120] [--stems-dir <dir>]
18
+ """
19
+
20
+ import sys
21
+ import os
22
+ import json
23
+ import argparse
24
+ import warnings
25
+
26
+ # Suppress warnings for cleaner JSON output
27
+ warnings.filterwarnings('ignore')
28
+
29
+
30
+ def check_dependencies():
31
+ """Verify ML and signal forensics dependencies."""
32
+ missing = []
33
+ try:
34
+ import librosa
35
+ except ImportError:
36
+ missing.append('librosa')
37
+ try:
38
+ import numpy
39
+ except ImportError:
40
+ missing.append('numpy')
41
+ try:
42
+ import scipy.stats
43
+ except ImportError:
44
+ missing.append('scipy')
45
+
46
+ if missing:
47
+ print(json.dumps({
48
+ 'error': 'missing_dependencies',
49
+ 'message': f'Missing Python packages for forensics: {", ".join(missing)}',
50
+ 'install': f'pip install {" ".join(missing)}'
51
+ }))
52
+ sys.exit(1)
53
+
54
+
55
+ # =========================================================================
56
+ # CLASSICAL FORENSICS ENGINES
57
+ # =========================================================================
58
+
59
+ def detect_spectral_cutoff(y, sr, n_fft=4096):
60
+ """Detect sharp high-frequency cutoff typical of AI models trained on MP3 data."""
61
+ import librosa
62
+ import numpy as np
63
+
64
+ nyquist = sr / 2
65
+ if nyquist < 18000:
66
+ return {'available': False, 'reason': f'sample_rate {sr} too low (need >= 44100)'}
67
+
68
+ S = np.abs(librosa.stft(y, n_fft=n_fft))
69
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
70
+
71
+ mean_spectrum = np.mean(S, axis=1)
72
+
73
+ def band_energy(lo, hi):
74
+ mask = (freqs >= lo) & (freqs < hi)
75
+ return float(np.mean(mean_spectrum[mask])) if mask.any() else 0.0
76
+
77
+ e_below_16k = band_energy(100, 16000)
78
+ e_16k_to_20k = band_energy(16000, 20000)
79
+
80
+ ratio = e_16k_to_20k / (e_below_16k + 1e-10)
81
+ has_cutoff = ratio < 0.005
82
+
83
+ return {
84
+ 'available': True,
85
+ 'has_16k_cutoff': has_cutoff,
86
+ 'energy_ratio_above_16k': round(ratio, 6),
87
+ 'energy_below_16k': round(e_below_16k, 6),
88
+ 'energy_16k_to_20k': round(e_16k_to_20k, 6),
89
+ }
90
+
91
+
92
+ def measure_phase_entropy(y, sr, n_fft=2048):
93
+ """Measure instantaneous phase entropy of the audio signal."""
94
+ import librosa
95
+ import numpy as np
96
+
97
+ D = librosa.stft(y, n_fft=n_fft)
98
+ phase = np.angle(D)
99
+
100
+ inst_freq = np.diff(phase, axis=1)
101
+
102
+ n_bins = phase.shape[0]
103
+ sample_bins = np.linspace(0, n_bins - 1, min(n_bins, 64), dtype=int)
104
+
105
+ entropies = []
106
+ for k in sample_bins:
107
+ row = inst_freq[k]
108
+ hist, _ = np.histogram(row, bins=64, range=(-np.pi, np.pi))
109
+ hist = hist.astype(np.float64) + 1e-10
110
+ hist /= hist.sum()
111
+ ent = -np.sum(hist * np.log2(hist))
112
+ entropies.append(ent)
113
+
114
+ mean_entropy = float(np.mean(entropies))
115
+ std_entropy = float(np.std(entropies))
116
+ normalized = mean_entropy / 6.0
117
+
118
+ return {
119
+ 'mean_entropy': round(mean_entropy, 4),
120
+ 'std_entropy': round(std_entropy, 4),
121
+ 'normalized_entropy': round(normalized, 4),
122
+ 'low_entropy': mean_entropy < 3.5,
123
+ }
124
+
125
+
126
+ def measure_spectral_contrast(y, sr):
127
+ """Measure spectral contrast across frequency sub-bands (identifies smearing)."""
128
+ import librosa
129
+ import numpy as np
130
+
131
+ contrast = librosa.feature.spectral_contrast(y=y, sr=sr, n_bands=6)
132
+ flatness = librosa.feature.spectral_flatness(y=y)
133
+
134
+ mean_contrast = float(np.mean(contrast))
135
+ std_contrast = float(np.std(contrast))
136
+ mean_flatness = float(np.mean(flatness))
137
+
138
+ return {
139
+ 'mean_contrast_db': round(mean_contrast, 4),
140
+ 'std_contrast_db': round(std_contrast, 4),
141
+ 'mean_flatness': round(mean_flatness, 6),
142
+ 'low_contrast': mean_contrast < 15.0,
143
+ 'high_flatness': mean_flatness > 0.05,
144
+ }
145
+
146
+
147
+ def measure_onset_regularity(y, sr):
148
+ """Measure coefficient of variation of inter-onset-intervals (quantization grid check)."""
149
+ import librosa
150
+ import numpy as np
151
+
152
+ onset_frames = librosa.onset.onset_detect(y=y, sr=sr, units='frames')
153
+
154
+ if len(onset_frames) < 4:
155
+ return {'available': False, 'reason': 'too_few_onsets'}
156
+
157
+ onset_times = librosa.frames_to_time(onset_frames, sr=sr)
158
+ ioi = np.diff(onset_times)
159
+
160
+ mean_ioi = float(np.mean(ioi))
161
+ std_ioi = float(np.std(ioi))
162
+ cv = std_ioi / mean_ioi if mean_ioi > 0 else 0
163
+
164
+ return {
165
+ 'available': True,
166
+ 'onset_count': len(onset_frames),
167
+ 'mean_ioi': round(mean_ioi, 4),
168
+ 'std_ioi': round(std_ioi, 4),
169
+ 'coefficient_of_variation': round(cv, 4),
170
+ 'metronomic': cv < 0.15,
171
+ }
172
+
173
+
174
+ def measure_harmonicity(y, sr=44100):
175
+ """Estimate harmonicity via harmonic/percussive energy ratio."""
176
+ import librosa
177
+ import numpy as np
178
+
179
+ y_harm, y_perc = librosa.effects.hpss(y)
180
+ harm_energy = float(np.mean(np.abs(y_harm)))
181
+ perc_energy = float(np.mean(np.abs(y_perc)))
182
+ total = harm_energy + perc_energy
183
+ if total <= 1e-10:
184
+ return {'available': False, 'reason': 'low_energy'}
185
+ harmonic_ratio = harm_energy / total
186
+
187
+ hf_hnr = None
188
+ if sr >= 24000:
189
+ n_fft = 4096
190
+ S_harm = np.abs(librosa.stft(y_harm, n_fft=n_fft))
191
+ S_perc = np.abs(librosa.stft(y_perc, n_fft=n_fft))
192
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
193
+ hf_mask = freqs >= 12000
194
+ hf_harm = float(np.mean(S_harm[hf_mask])) if hf_mask.any() else 0.0
195
+ hf_perc = float(np.mean(S_perc[hf_mask])) if hf_mask.any() else 0.0
196
+ hf_total = hf_harm + hf_perc
197
+ hf_hnr = round(hf_harm / hf_total, 4) if hf_total > 1e-10 else 0.0
198
+
199
+ result = {
200
+ 'available': True,
201
+ 'harmonic_ratio': round(harmonic_ratio, 4),
202
+ }
203
+ if hf_hnr is not None:
204
+ result['hf_harmonic_ratio'] = hf_hnr
205
+ result['hf_anomalous'] = hf_hnr > 0.7
206
+ return result
207
+
208
+
209
+ def measure_crest_factor(y):
210
+ """Estimate transient dynamic crest factor."""
211
+ import numpy as np
212
+
213
+ rms = float(np.sqrt(np.mean(y ** 2)))
214
+ if rms <= 1e-10:
215
+ return {'available': False, 'reason': 'silent'}
216
+ peak = float(np.max(np.abs(y)))
217
+ crest = peak / rms
218
+ return {
219
+ 'available': True,
220
+ 'crest_factor': round(crest, 4),
221
+ 'low_crest': crest < 4.0,
222
+ }
223
+
224
+
225
+ def measure_spectral_centroid_variance(y, sr):
226
+ """Spectral centroid variance over time."""
227
+ import librosa
228
+ import numpy as np
229
+
230
+ centroid = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
231
+ if len(centroid) < 2:
232
+ return {'available': False, 'reason': 'insufficient_frames'}
233
+ mean_c = float(np.mean(centroid))
234
+ std_c = float(np.std(centroid))
235
+ cv = std_c / mean_c if mean_c > 0 else 0.0
236
+ return {
237
+ 'available': True,
238
+ 'mean': round(mean_c, 2),
239
+ 'std': round(std_c, 2),
240
+ 'cv': round(cv, 4),
241
+ 'low_variance': cv < 0.30,
242
+ }
243
+
244
+
245
+ def measure_spectral_bandwidth_variance(y, sr):
246
+ """Spectral bandwidth variance."""
247
+ import librosa
248
+ import numpy as np
249
+
250
+ bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)[0]
251
+ if len(bw) < 2:
252
+ return {'available': False, 'reason': 'insufficient_frames'}
253
+ mean_bw = float(np.mean(bw))
254
+ std_bw = float(np.std(bw))
255
+ cv = std_bw / mean_bw if mean_bw > 0 else 0.0
256
+ return {
257
+ 'available': True,
258
+ 'mean': round(mean_bw, 2),
259
+ 'std': round(std_bw, 2),
260
+ 'cv': round(cv, 4),
261
+ 'low_variance': cv < 0.25,
262
+ }
263
+
264
+
265
+ def measure_spectral_rolloff(y, sr):
266
+ """Spectral rolloff curve steepness."""
267
+ import librosa
268
+ import numpy as np
269
+
270
+ rolloff_85 = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)[0]
271
+ rolloff_95 = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)[0]
272
+ if len(rolloff_85) < 2:
273
+ return {'available': False, 'reason': 'insufficient_frames'}
274
+ mean_85 = float(np.mean(rolloff_85))
275
+ mean_95 = float(np.mean(rolloff_95))
276
+ steepness = (mean_95 - mean_85) / (mean_95 + 1e-10)
277
+ return {
278
+ 'available': True,
279
+ 'mean_rolloff_85': round(mean_85, 2),
280
+ 'mean_rolloff_95': round(mean_95, 2),
281
+ 'std_rolloff_85': round(float(np.std(rolloff_85)), 2),
282
+ 'steepness': round(steepness, 4),
283
+ 'steep_rolloff': steepness < 0.15,
284
+ }
285
+
286
+
287
+ def measure_spectral_flux(y, sr, n_fft=2048):
288
+ """Spectral flux variance across frames."""
289
+ import librosa
290
+ import numpy as np
291
+
292
+ S = np.abs(librosa.stft(y, n_fft=n_fft))
293
+ if S.shape[1] < 3:
294
+ return {'available': False, 'reason': 'insufficient_frames'}
295
+ flux = np.sqrt(np.mean(np.diff(S, axis=1) ** 2, axis=0))
296
+ mean_flux = float(np.mean(flux))
297
+ std_flux = float(np.std(flux))
298
+ cv = std_flux / mean_flux if mean_flux > 0 else 0.0
299
+ return {
300
+ 'available': True,
301
+ 'mean_flux': round(mean_flux, 6),
302
+ 'std_flux': round(std_flux, 6),
303
+ 'cv': round(cv, 4),
304
+ 'low_flux_variance': cv < 0.55,
305
+ }
306
+
307
+
308
+ def measure_zcr_variance(y):
309
+ """Zero-crossing rate coefficient of variation."""
310
+ import librosa
311
+ import numpy as np
312
+
313
+ zcr = librosa.feature.zero_crossing_rate(y=y)[0]
314
+ if len(zcr) < 2:
315
+ return {'available': False, 'reason': 'insufficient_frames'}
316
+ mean_zcr = float(np.mean(zcr))
317
+ std_zcr = float(np.std(zcr))
318
+ cv = std_zcr / mean_zcr if mean_zcr > 0 else 0.0
319
+ return {
320
+ 'available': True,
321
+ 'mean_zcr': round(mean_zcr, 6),
322
+ 'std_zcr': round(std_zcr, 6),
323
+ 'cv': round(cv, 4),
324
+ 'low_variance': cv < 0.45,
325
+ }
326
+
327
+
328
+ def measure_mfcc_temporal_stats(y, sr, n_mfcc=13):
329
+ """MFCC temporal statistics variance (timbral palette check)."""
330
+ import librosa
331
+ import numpy as np
332
+
333
+ mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
334
+ if mfccs.shape[1] < 4:
335
+ return {'available': False, 'reason': 'insufficient_frames'}
336
+ variances = np.var(mfccs, axis=1)
337
+ mean_var = float(np.mean(variances))
338
+ return {
339
+ 'available': True,
340
+ 'mean_variance': round(mean_var, 4),
341
+ 'per_coeff_variance': [round(float(v), 4) for v in variances],
342
+ 'low_variance': mean_var < 700.0,
343
+ }
344
+
345
+
346
+ def measure_chroma_entropy(y, sr):
347
+ """Chroma distribution entropy."""
348
+ import librosa
349
+ import numpy as np
350
+ from scipy.stats import entropy
351
+
352
+ chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
353
+ if chroma.shape[1] < 2:
354
+ return {'available': False, 'reason': 'insufficient_frames'}
355
+ frame_entropies = []
356
+ for i in range(chroma.shape[1]):
357
+ col = chroma[:, i]
358
+ col_norm = col / (col.sum() + 1e-10)
359
+ frame_entropies.append(entropy(col_norm, base=2))
360
+ mean_ent = float(np.mean(frame_entropies))
361
+ std_ent = float(np.std(frame_entropies))
362
+ max_entropy = np.log2(12)
363
+ normalized = mean_ent / max_entropy
364
+ return {
365
+ 'available': True,
366
+ 'mean_entropy': round(mean_ent, 4),
367
+ 'std_entropy': round(std_ent, 4),
368
+ 'normalized': round(normalized, 4),
369
+ 'low_entropy': normalized < 0.88,
370
+ }
371
+
372
+
373
+ def measure_energy_arc(y, sr, n_segments=8):
374
+ """Inter-segment energy variance (detects monotonic plateaus)."""
375
+ import librosa
376
+ import numpy as np
377
+
378
+ rms = librosa.feature.rms(y=y)[0]
379
+ if len(rms) < n_segments:
380
+ return {'available': False, 'reason': 'too_short'}
381
+ seg_len = len(rms) // n_segments
382
+ segment_means = []
383
+ for i in range(n_segments):
384
+ start = i * seg_len
385
+ end = start + seg_len
386
+ segment_means.append(float(np.mean(rms[start:end])))
387
+ arc_variance = float(np.var(segment_means))
388
+ arc_range = max(segment_means) - min(segment_means)
389
+ return {
390
+ 'available': True,
391
+ 'segment_means': [round(s, 6) for s in segment_means],
392
+ 'arc_variance': round(arc_variance, 8),
393
+ 'arc_range': round(arc_range, 6),
394
+ 'flat_arc': arc_variance < 0.0005,
395
+ }
396
+
397
+
398
+ def measure_checkerboard_artifacts(y, sr, n_fft=2048):
399
+ """Cepstral periodic upsampling peak detection (combats neural vocoders)."""
400
+ import numpy as np
401
+ import librosa
402
+
403
+ S = np.abs(librosa.stft(y, n_fft=n_fft)) + 1e-10
404
+ log_S = np.log(S)
405
+ mean_log = np.mean(log_S, axis=1)
406
+
407
+ cepstrum = np.real(np.fft.ifft(mean_log))
408
+ n = len(cepstrum)
409
+ if n < 64:
410
+ return {'available': False, 'reason': 'insufficient_cepstrum_length'}
411
+
412
+ high_q_region = cepstrum[16:n//2]
413
+ peaks = np.abs(high_q_region)
414
+ mean_val = np.mean(peaks)
415
+ max_val = np.max(peaks)
416
+
417
+ ratio = max_val / (mean_val + 1e-10)
418
+ has_artifacts = ratio > 6.0
419
+
420
+ return {
421
+ 'available': True,
422
+ 'cepstral_peak_ratio': round(float(ratio), 4),
423
+ 'has_artifacts': has_artifacts,
424
+ 'pow2_peak_ratio': round(float(ratio), 4),
425
+ }
426
+
427
+
428
+ def measure_subband_energy_distribution(y, sr, n_fft=4096):
429
+ """Shannon entropy of energy distribution across subbands."""
430
+ import librosa
431
+ import numpy as np
432
+ from scipy.stats import entropy
433
+
434
+ S = np.abs(librosa.stft(y, n_fft=n_fft))
435
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
436
+ mean_spec = np.mean(S, axis=1)
437
+
438
+ bands = [
439
+ (20, 150), (150, 500), (500, 2000), (2000, 8000), (8000, 20000)
440
+ ]
441
+ band_energies = []
442
+ for lo, hi in bands:
443
+ mask = (freqs >= lo) & (freqs < hi)
444
+ band_energies.append(float(np.sum(mean_spec[mask])) if mask.any() else 0.0)
445
+
446
+ band_energies = np.array(band_energies)
447
+ total = np.sum(band_energies)
448
+ if total <= 1e-10:
449
+ return {'available': False, 'reason': 'silent'}
450
+
451
+ probs = band_energies / total
452
+ ent = entropy(probs, base=2)
453
+ max_ent = np.log2(len(bands))
454
+ normalized = ent / max_ent
455
+
456
+ return {
457
+ 'available': True,
458
+ 'distribution_entropy': round(float(normalized), 4),
459
+ 'low_entropy': normalized < 0.78,
460
+ }
461
+
462
+
463
+ def measure_pre_echo(y, sr):
464
+ """Transient pre-echo temporal framing analysis."""
465
+ import librosa
466
+ import numpy as np
467
+
468
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr)
469
+ onsets = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, units='samples')
470
+ if len(onsets) < 2:
471
+ return {'available': False, 'reason': 'insufficient_onsets'}
472
+
473
+ ratios = []
474
+ slopes = []
475
+ win_len = int(0.05 * sr) # 50ms window
476
+
477
+ for o in onsets[:10]: # Check first 10 onsets
478
+ start_pre = o - win_len
479
+ if start_pre < 0 or o + win_len > len(y):
480
+ continue
481
+ pre_frame = y[start_pre:o]
482
+ post_frame = y[o:o+win_len]
483
+
484
+ pre_rms = np.sqrt(np.mean(pre_frame ** 2))
485
+ post_rms = np.sqrt(np.mean(post_frame ** 2))
486
+
487
+ if post_rms > 1e-4:
488
+ ratios.append(pre_rms / post_rms)
489
+
490
+ env_pre = librosa.onset.onset_strength(y=pre_frame, sr=sr)
491
+ if len(env_pre) > 1:
492
+ slopes.append(np.mean(np.diff(env_pre)))
493
+
494
+ if not ratios:
495
+ return {'available': False, 'reason': 'insufficient_pre_windows'}
496
+
497
+ mean_ratio = float(np.mean(ratios))
498
+ positive_slope_ratio = float(np.mean(slopes)) if slopes else 0.0
499
+
500
+ return {
501
+ 'available': True,
502
+ 'mean_pre_echo_ratio': round(mean_ratio, 4),
503
+ 'positive_slope_ratio': round(positive_slope_ratio, 4),
504
+ 'has_pre_echo': mean_ratio > 0.15,
505
+ }
506
+
507
+
508
+ def measure_hf_phase_incoherence(y, sr, n_fft=4096):
509
+ """High-frequency phase incoherence group delay variance."""
510
+ import librosa
511
+ import numpy as np
512
+
513
+ D = librosa.stft(y, n_fft=n_fft)
514
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
515
+ hf_mask = freqs >= 12000
516
+ if not hf_mask.any():
517
+ return {'available': False, 'reason': 'sample_rate_too_low'}
518
+
519
+ phase = np.angle(D[hf_mask, :])
520
+ group_delay = np.diff(phase, axis=0) # phase derivative across frequency
521
+ variances = np.var(group_delay, axis=1)
522
+ mean_var = float(np.mean(variances))
523
+
524
+ return {
525
+ 'available': True,
526
+ 'mean_group_delay_variance': round(mean_var, 4),
527
+ 'hf_incoherent': mean_var > 2.5,
528
+ }
529
+
530
+
531
+ def measure_ms_phase_coherence(y_stereo, sr, n_fft=2048):
532
+ """Mid-side phase coherence analysis for stereo files."""
533
+ import librosa
534
+ import numpy as np
535
+
536
+ if y_stereo is None or y_stereo.ndim < 2 or y_stereo.shape[0] < 2:
537
+ return {'available': False, 'reason': 'mono_input'}
538
+
539
+ # Extract Mid (Left+Right) and Side (Left-Right)
540
+ left = y_stereo[0]
541
+ right = y_stereo[1]
542
+ mid = 0.5 * (left + right)
543
+ side = 0.5 * (left - right)
544
+
545
+ M = np.abs(librosa.stft(mid, n_fft=n_fft))
546
+ S = np.abs(librosa.stft(side, n_fft=n_fft))
547
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
548
+
549
+ def coherence_ratio(lo, hi):
550
+ mask = (freqs >= lo) & (freqs < hi)
551
+ if not mask.any():
552
+ return 0.0
553
+ m_eng = np.sum(M[mask])
554
+ s_eng = np.sum(S[mask])
555
+ return float(s_eng / (m_eng + s_eng + 1e-10))
556
+
557
+ sub_bass = coherence_ratio(20, 100)
558
+ low_mid = coherence_ratio(100, 1000)
559
+
560
+ # In natural recordings, sub_bass has high mid coherence (low side ratio < 0.1).
561
+ # AI models smear spatial coherence across bands.
562
+ is_anomalous = sub_bass > 0.4 or (low_mid < 0.15 and sub_bass > 0.3)
563
+
564
+ return {
565
+ 'available': True,
566
+ 'sub_bass_sm_ratio': round(sub_bass, 4),
567
+ 'low_mid_sm_ratio': round(low_mid, 4),
568
+ 'ms_anomalous': is_anomalous,
569
+ }
570
+
571
+
572
+ def measure_pitch_jitter(y, sr):
573
+ """Exposes perfect linear vibrato modulating pitch (synthetically periodic)."""
574
+ import librosa
575
+ import numpy as np
576
+
577
+ y_harm, _ = librosa.effects.hpss(y)
578
+ f0, _, voiced_probs = librosa.pyin(
579
+ y_harm, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7'), sr=sr
580
+ )
581
+
582
+ valid_f0 = f0[~np.isnan(f0) & (voiced_probs > 0.6)]
583
+ if len(valid_f0) < 64:
584
+ return {'available': False, 'reason': 'insufficient_voiced_content'}
585
+
586
+ diffs = np.diff(valid_f0)
587
+ mean_diff = np.mean(np.abs(diffs))
588
+
589
+ # Calculate modulation spectrum peak
590
+ # Synthetic pitch modulation has clean sinusoidal frequency
591
+ spectrum = np.abs(np.fft.rfft(valid_f0 - np.mean(valid_f0)))
592
+ peak_val = np.max(spectrum[1:])
593
+ mean_spec = np.mean(spectrum[1:])
594
+ ratio = peak_val / (mean_spec + 1e-10)
595
+
596
+ return {
597
+ 'available': True,
598
+ 'mean_pitch_step': round(float(mean_diff), 4),
599
+ 'modulation_spectral_peak': round(float(ratio), 4),
600
+ 'mean_modulation_slope': round(float(ratio), 4),
601
+ 'perfect_vibrato': ratio > 5.0,
602
+ }
603
+
604
+
605
+ def measure_noise_floor_structure(y, sr, n_fft=4096):
606
+ """Shannon entropy of noise floor autocorrelation (spots hidden watermark remnants)."""
607
+ import librosa
608
+ import numpy as np
609
+
610
+ _, y_perc = librosa.effects.hpss(y)
611
+ S = np.abs(librosa.stft(y_perc, n_fft=n_fft))
612
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
613
+
614
+ hf_mask = freqs >= 15000
615
+ if not hf_mask.any():
616
+ return {'available': False, 'reason': 'sample_rate_too_low'}
617
+
618
+ hf_noise = np.mean(S[hf_mask, :], axis=0)
619
+ # Autocorrelation of high frequency envelope
620
+ acorr = np.correlate(hf_noise, hf_noise, mode='full')
621
+ acorr = acorr[acorr.length//2:]
622
+ acorr /= acorr[0] + 1e-10
623
+
624
+ # Human noise floor is highly unpredictable (decaying random acorr).
625
+ # Hidden cyclical metadata layers leave correlation peaks.
626
+ peaks = acorr[16:128]
627
+ max_peak = float(np.max(peaks)) if len(peaks) > 0 else 0.0
628
+
629
+ return {
630
+ 'available': True,
631
+ 'noise_floor_autocorr': round(max_peak, 4),
632
+ 'structured_noise': max_peak > 0.35,
633
+ }
634
+
635
+
636
+ def measure_loop_repetition(y, sr):
637
+ """Estimate loop repetition structural score via self-similarity."""
638
+ import librosa
639
+ import numpy as np
640
+
641
+ chroma = librosa.feature.chroma_cqt(y=y, sr=sr)
642
+ # Calculate self-similarity matrix
643
+ similarity = np.dot(chroma.T, chroma)
644
+ n = similarity.shape[0]
645
+ if n < 8:
646
+ return {'available': False, 'reason': 'too_short'}
647
+
648
+ # Check off-diagonal periodic repeating stripes (AI loops are identical)
649
+ similarity /= np.max(similarity) + 1e-10
650
+ diags = [float(np.mean(np.diagonal(similarity, offset=k))) for k in range(4, n//2)]
651
+ max_diag = max(diags) if diags else 0.0
652
+ return {
653
+ 'available': True,
654
+ 'repetition_score': round(max_diag, 4),
655
+ 'high_loop_repetition': max_diag > 0.65,
656
+ }
657
+
658
+
659
+ def measure_tempo_regularity(y, sr):
660
+ """Tempo stability score (tracks microtiming grid deviations)."""
661
+ import librosa
662
+ import numpy as np
663
+
664
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr)
665
+ tempogram = librosa.feature.tempogram(onset_envelope=onset_env, sr=sr)
666
+ if tempogram.shape[1] < 4:
667
+ return {'available': False, 'reason': 'too_short'}
668
+
669
+ # Calculate variance of tempo strengths across frames
670
+ tempo_profile = np.max(tempogram, axis=0)
671
+ mean_val = np.mean(tempo_profile)
672
+ std_val = np.std(tempo_profile)
673
+ cv = std_val / mean_val if mean_val > 0 else 0.0
674
+ stability = 1.0 - cv
675
+ return {
676
+ 'available': True,
677
+ 'stability': round(float(stability), 4),
678
+ 'cv': round(float(cv), 4),
679
+ }
680
+
681
+
682
+ # =========================================================================
683
+ # ORCHESTRATION PIPELINE
684
+ # =========================================================================
685
+
686
+ def run_forensics(audio_path, max_length_seconds=120, stems_dir=None):
687
+ """Orchestrates all spectral forensics modules and returns analysis payload."""
688
+ import librosa
689
+
690
+ if not os.path.exists(audio_path):
691
+ raise FileNotFoundError(f'Audio file not found: {audio_path}')
692
+
693
+ target_sr = 44100
694
+ y, sr = librosa.load(audio_path, sr=target_sr, mono=True)
695
+
696
+ y_stereo = None
697
+ try:
698
+ y_stereo_raw, _ = librosa.load(audio_path, sr=target_sr, mono=False)
699
+ if y_stereo_raw.ndim == 2 and y_stereo_raw.shape[0] >= 2:
700
+ y_stereo = y_stereo_raw
701
+ except Exception:
702
+ pass
703
+
704
+ duration = len(y) / sr
705
+ max_samples = int(max_length_seconds * sr)
706
+ if len(y) > max_samples:
707
+ y = y[:max_samples]
708
+ if y_stereo is not None and y_stereo.shape[1] > max_samples:
709
+ y_stereo = y_stereo[:, :max_samples]
710
+
711
+ # Run full forensics suite
712
+ forensics = {
713
+ 'spectral_cutoff': detect_spectral_cutoff(y, sr),
714
+ 'phase_entropy': measure_phase_entropy(y, sr),
715
+ 'spectral_contrast': measure_spectral_contrast(y, sr),
716
+ 'onset_regularity': measure_onset_regularity(y, sr),
717
+ 'harmonicity': measure_harmonicity(y, sr),
718
+ 'loop_repetition': measure_loop_repetition(y, sr),
719
+ 'tempo_regularity': measure_tempo_regularity(y, sr),
720
+ 'crest_factor': measure_crest_factor(y),
721
+ 'spectral_centroid_var': measure_spectral_centroid_variance(y, sr),
722
+ 'spectral_bandwidth_var': measure_spectral_bandwidth_variance(y, sr),
723
+ 'spectral_rolloff': measure_spectral_rolloff(y, sr),
724
+ 'spectral_flux': measure_spectral_flux(y, sr),
725
+ 'zcr_variance': measure_zcr_variance(y),
726
+ 'mfcc_temporal': measure_mfcc_temporal_stats(y, sr),
727
+ 'chroma_entropy': measure_chroma_entropy(y, sr),
728
+ 'energy_arc': measure_energy_arc(y, sr),
729
+ 'checkerboard': measure_checkerboard_artifacts(y, sr),
730
+ 'subband_energy': measure_subband_energy_distribution(y, sr),
731
+ 'pre_echo': measure_pre_echo(y, sr),
732
+ 'hf_phase_incoherence': measure_hf_phase_incoherence(y, sr),
733
+ 'ms_phase_coherence': measure_ms_phase_coherence(y_stereo, sr),
734
+ 'pitch_jitter': measure_pitch_jitter(y, sr),
735
+ 'noise_floor_structure': measure_noise_floor_structure(y, sr),
736
+ }
737
+
738
+ # Integrate Demucs stems if available
739
+ if stems_dir and os.path.isdir(stems_dir):
740
+ import numpy as np
741
+ stem_forensics = {}
742
+
743
+ def load_stem(stem_name):
744
+ stem_path = os.path.join(stems_dir, f'{stem_name}.wav')
745
+ if not os.path.exists(stem_path):
746
+ return None
747
+ stem_y, _ = librosa.load(stem_path, sr=sr, mono=True)
748
+ max_samples_stem = int(max_length_seconds * sr)
749
+ if len(stem_y) > max_samples_stem:
750
+ stem_y = stem_y[:max_samples_stem]
751
+ return stem_y
752
+
753
+ vocals_stem = load_stem('vocals')
754
+ drums_stem = load_stem('drums')
755
+ bass_stem = load_stem('bass')
756
+ other_stem = load_stem('other')
757
+
758
+ if vocals_stem is not None and len(vocals_stem) > 0:
759
+ stem_forensics['vocal_spectral_cutoff'] = detect_spectral_cutoff(vocals_stem, sr)
760
+ stem_forensics['vocal_phase_entropy'] = measure_phase_entropy(vocals_stem, sr)
761
+
762
+ if drums_stem is not None and len(drums_stem) > 0:
763
+ stem_forensics['drum_onset_regularity'] = measure_onset_regularity(drums_stem, sr)
764
+
765
+ stem_dynamic_ranges = {}
766
+ for stem_name, stem_y in [('vocals', vocals_stem), ('drums', drums_stem), ('bass', bass_stem), ('other', other_stem)]:
767
+ if stem_y is not None and len(stem_y) > 0:
768
+ stem_dynamic_ranges[stem_name] = calculate_dynamic_range(stem_y)
769
+ if stem_dynamic_ranges:
770
+ stem_forensics['stem_dynamic_ranges'] = stem_dynamic_ranges
771
+
772
+ if vocals_stem is not None and len(vocals_stem) > 1000:
773
+ bleed_scores = []
774
+ for stem_y in [drums_stem, bass_stem, other_stem]:
775
+ if stem_y is None or len(stem_y) < 1000:
776
+ continue
777
+ target_len = min(len(vocals_stem), len(stem_y))
778
+ if target_len < 1000:
779
+ continue
780
+ v = vocals_stem[:target_len]
781
+ o = stem_y[:target_len]
782
+ v_std = float(np.std(v))
783
+ o_std = float(np.std(o))
784
+ if v_std <= 1e-10 or o_std <= 1e-10:
785
+ continue
786
+ corr = float(np.corrcoef(v, o)[0, 1])
787
+ if not np.isnan(corr):
788
+ bleed_scores.append(abs(corr))
789
+
790
+ if bleed_scores:
791
+ mean_bleed = float(np.mean(bleed_scores))
792
+ stem_forensics['vocal_instrumental_bleed'] = {
793
+ 'available': True,
794
+ 'mean_abs_correlation': round(mean_bleed, 4),
795
+ 'high_bleed': mean_bleed > 0.12,
796
+ }
797
+ else:
798
+ stem_forensics['vocal_instrumental_bleed'] = {
799
+ 'available': False,
800
+ 'reason': 'insufficient_stems',
801
+ }
802
+
803
+ if stem_forensics:
804
+ forensics['stem_forensics'] = stem_forensics
805
+
806
+ return forensics
807
+
808
+
809
+ def main():
810
+ parser = argparse.ArgumentParser(description='Analyze audio for AI spectral forensics anomalies')
811
+ parser.add_argument('audio_path', help='Path to audio file')
812
+ parser.add_argument('--output', choices=['json'], default='json',
813
+ help='Output format (default: json)')
814
+ parser.add_argument('--max-length', type=int, default=120,
815
+ help='Max audio length to analyze in seconds (default: 120)')
816
+ parser.add_argument('--stems-dir',
817
+ help='Directory containing Demucs stems for stem-aware forensics')
818
+
819
+ args = parser.parse_args()
820
+
821
+ check_dependencies()
822
+
823
+ if not os.path.exists(args.audio_path):
824
+ print(json.dumps({'error': 'file_not_found', 'message': f'File not found: {args.audio_path}'}))
825
+ sys.exit(1)
826
+
827
+ try:
828
+ import numpy as np
829
+
830
+ class NumpyEncoder(json.JSONEncoder):
831
+ def default(self, obj):
832
+ if isinstance(obj, (np.bool_, np.generic)):
833
+ return obj.item()
834
+ return super().default(obj)
835
+
836
+ forensics_payload = run_forensics(
837
+ args.audio_path,
838
+ max_length_seconds=args.max_length,
839
+ stems_dir=args.stems_dir
840
+ )
841
+ print(json.dumps(forensics_payload, cls=NumpyEncoder))
842
+
843
+ except Exception as e:
844
+ print(json.dumps({
845
+ 'error': 'processing_error',
846
+ 'message': str(e),
847
+ 'type': type(e).__name__
848
+ }))
849
+ sys.exit(1)
850
+
851
+
852
+ if __name__ == '__main__':
853
+ main()
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: orbit-forensics
3
+ Version: 1.0.0
4
+ Summary: High-fidelity AI audio forensics and spectral anomaly detection
5
+ License: ISC
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: ISC License (ISCL)
8
+ Classifier: Operating System :: OS Independent
9
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Analysis
10
+ Requires-Python: >=3.8
11
+ Description-Content-Type: text/markdown
12
+ Requires-Dist: librosa>=0.10.0
13
+ Requires-Dist: numpy>=1.20.0
14
+ Requires-Dist: scipy>=1.8.0
15
+
16
+ # orbit_forensics
17
+
18
+ Standalone Python package for ORBIT FORENSICS analysis.
@@ -0,0 +1,7 @@
1
+ orbit_forensics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ orbit_forensics/audio_forensics.py,sha256=79qo07DIy0JDH2zblsgbzUGWB-GbrjVnpxloXdt9bos,28615
3
+ orbit_forensics-1.0.0.dist-info/METADATA,sha256=_dRCLWFHkPWvVj8zDlUNhpyb0-HAbWUP_Vz7vs_GFp0,584
4
+ orbit_forensics-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ orbit_forensics-1.0.0.dist-info/entry_points.txt,sha256=YKCN64ZciSa9-PhLB-y51ftxD0BhDuHmHrpQglyerSE,73
6
+ orbit_forensics-1.0.0.dist-info/top_level.txt,sha256=flTUqkfZ6Y3vVw9J1EHih1kZhMzBWDvXGEX2oVB77DI,16
7
+ orbit_forensics-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ orbit-forensics = orbit_forensics.audio_forensics:main
@@ -0,0 +1 @@
1
+ orbit_forensics