@ericdisero/aurora-shared 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,13 +12,15 @@ import { z } from 'zod';
12
12
  import { v4 as uuidv4 } from 'uuid';
13
13
  import { getDbPath, getProjectsDirectory, getUserDataDir } from '../paths.js';
14
14
  import { getMvsepKey, getSunoKey, getKieKey } from '../config.js';
15
- import { createCover, createGeneration, createSoundsGeneration, createWavConversion, downloadTo, getRemainingCredits, host, pollWavConversion, uploadAudioFile } from '../providers/suno.js';
15
+ import { createAddInstrumental, createAddVocals, createCover, createGeneration, createSoundsGeneration, createWavConversion, downloadTo, getRemainingCredits, host, pollWavConversion, uploadAudioFile } from '../providers/suno.js';
16
16
  import { getMvsepUserInfo } from '../providers/mvsep.js';
17
17
  import { createProject, deleteProject, getProject, getProjectDirectory, listProjects, renameProject } from '../storage/projects.js';
18
18
  import { addFileAsset, deleteAsset, getAsset, insertAsset, listAssets, updateAssetPath } from '../storage/assets.js';
19
19
  import { getProjectStems, getStems } from '../storage/stems.js';
20
20
  import { advanceJob, listJobs, loadJob, newJobManifest, saveJob } from '../jobs.js';
21
21
  import { createSplitJobs, prepareSplit } from '../split.js';
22
+ import { prepareExtract } from '../extract.js';
23
+ import { EXTRACT_BUNDLES, EXTRACT_INDIVIDUAL_STEMS, estimateExtractCost } from '../extract-catalog.js';
22
24
  import { probeDurationSeconds, standardizeToWav, convertToMp3, pitchShift } from '../audio/ffmpeg.js';
23
25
  import { runRipMidi, runRvcUpscale } from '../sidecars.js';
24
26
  import { addLane, exportStackBundle, laneNameFromPath, loadStack, removeLane, updateLane } from '../stack.js';
@@ -313,24 +315,82 @@ const fetchWavOp = {
313
315
  }
314
316
  };
315
317
  // ── Generation (long ops — background-capable) ──────────────────
318
+ // Shared knob schemas — the schema IS the agent's manual (param-table contract:
319
+ // docs/suno-param-surface.md, verified vs live docs 2026-06-10).
320
+ const styleWeightSchema = z
321
+ .number()
322
+ .min(0)
323
+ .max(1)
324
+ .optional()
325
+ .describe('0..1 — how hard the output follows the style text. ~0.55-0.75 for layering work');
326
+ const weirdnessSchema = z
327
+ .number()
328
+ .min(0)
329
+ .max(1)
330
+ .optional()
331
+ .describe('0..1 — creative deviation/novelty. Low (0.2-0.4) = predictable takes, high = surprises');
332
+ const audioWeightSchema = z
333
+ .number()
334
+ .min(0)
335
+ .max(1)
336
+ .optional()
337
+ .describe('0..1 — input-audio influence on audio-conditioned ops. 0.7-0.85 locks tempo/harmony to the upload');
338
+ const personaIdSchema = z
339
+ .string()
340
+ .optional()
341
+ .describe('Persona id or Suno Voice voiceId — keeps a consistent vocal character across generations (custom mode only)');
342
+ const personaModelSchema = z
343
+ .enum(['style_persona', 'voice_persona'])
344
+ .optional()
345
+ .describe('style_persona (default) | voice_persona (set when personaId is a Suno Voice voiceId, V5/V5_5 only)');
316
346
  const generateOp = {
317
347
  id: 'aurora_generate',
318
348
  description: 'Generate a full music track via Suno (2 variations land as project assets, MP3 + WAV-upgradeable). ' +
319
- 'Takes 1-3 minutes. PAID (Suno credits) check aurora_get_credits first. ' +
349
+ 'ULTRA-CUSTOM by default: in custom mode (style + title set) the prompt is the EXACT sung lyrics — ' +
350
+ 'write real lyrics with section metatags like [Verse]/[Chorus]/[Choir]. Takes 1-3 minutes. ' +
351
+ 'PAID (Suno credits) — check aurora_get_credits first. ' +
320
352
  BACKGROUND_DESCRIBE,
321
353
  input: z.object({
322
- prompt: z.string().describe('Lyrics in custom mode (style/title set), else a track description'),
323
- style: z.string().optional().describe('Music style (implies custom mode)'),
324
- title: z.string().optional().describe('Track title (implies custom mode)'),
354
+ prompt: z
355
+ .string()
356
+ .describe('Custom mode: the EXACT lyrics sung verbatim (≤5000 chars on V4_5+; supports [Verse]/[Chorus]/[Choir]/[Instrumental] metatags; ignored when instrumental). ' +
357
+ 'Non-custom mode: a ≤500-char track description — Suno writes its own lyrics'),
358
+ customMode: z
359
+ .boolean()
360
+ .optional()
361
+ .describe('true = full control (style + title REQUIRED, prompt = literal lyrics). false = description-only mode. ' +
362
+ 'Default: true when style or title is set. Prefer custom mode — it is the whole point of this surface'),
363
+ style: z
364
+ .string()
365
+ .optional()
366
+ .describe('Music style text (≤1000 chars on V4_5+). Required in custom mode'),
367
+ title: z.string().optional().describe('Track title (≤100 chars). Required in custom mode'),
325
368
  instrumental: z.boolean().optional().describe('Generate without vocals (default false)'),
326
- model: z.string().optional().describe(`Suno model id (default ${DEFAULT_GEN_MODEL})`),
369
+ model: z
370
+ .string()
371
+ .optional()
372
+ .describe(`V4 | V4_5 | V4_5PLUS | V4_5ALL | V5 | V5_5 (default ${DEFAULT_GEN_MODEL}; dots normalized)`),
327
373
  vocalGender: z.enum(['male', 'female']).optional(),
328
- negativeTags: z.string().optional().describe('Styles to exclude, ONE comma-separated string'),
374
+ negativeTags: z
375
+ .string()
376
+ .optional()
377
+ .describe('Styles/instruments to exclude, ONE comma-separated string, e.g. "drums, percussion, orchestra". More reliable than "no X" in the style text'),
378
+ styleWeight: styleWeightSchema,
379
+ weirdnessConstraint: weirdnessSchema,
380
+ audioWeight: audioWeightSchema,
381
+ personaId: personaIdSchema,
382
+ personaModel: personaModelSchema,
329
383
  projectId: z.string().optional().describe('Target project (auto-created from the title/prompt when omitted)'),
330
384
  background: z.boolean().optional().describe('Return a jobId immediately instead of waiting')
331
385
  }),
332
386
  async run(input) {
333
- const customMode = Boolean(input.style || input.title);
387
+ const customMode = input.customMode ?? Boolean(input.style || input.title);
388
+ if (customMode && (!input.style || !input.title)) {
389
+ throw new Error('Custom mode requires BOTH style and title (lyrics go in prompt).');
390
+ }
391
+ if (!customMode && input.prompt.length > 500) {
392
+ throw new Error('Non-custom prompts cap at 500 chars (it is a description, not lyrics). For literal lyrics set customMode true + style + title.');
393
+ }
334
394
  const model = (input.model ?? DEFAULT_GEN_MODEL).replace(/\./g, '_');
335
395
  const baseName = input.title?.trim() || input.prompt.slice(0, 60).trim() || 'Generated track';
336
396
  const projectId = await resolveProjectOrCreate(input.projectId, baseName);
@@ -342,16 +402,27 @@ const generateOp = {
342
402
  customMode,
343
403
  model,
344
404
  vocalGender: input.vocalGender,
345
- negativeTags: input.negativeTags
405
+ negativeTags: input.negativeTags,
406
+ styleWeight: input.styleWeight,
407
+ weirdnessConstraint: input.weirdnessConstraint,
408
+ audioWeight: input.audioWeight,
409
+ personaId: input.personaId,
410
+ personaModel: input.personaModel
346
411
  });
347
412
  const manifest = newJobManifest('generate', `gen-${uuidv4().slice(0, 8)}`, projectId, baseName, {
348
413
  prompt: input.prompt,
414
+ customMode,
349
415
  style: input.style,
350
416
  title: input.title,
351
417
  instrumental: input.instrumental ?? false,
352
418
  model,
353
419
  vocalGender: input.vocalGender ?? null,
354
- negativeTags: input.negativeTags
420
+ negativeTags: input.negativeTags,
421
+ styleWeight: input.styleWeight,
422
+ weirdnessConstraint: input.weirdnessConstraint,
423
+ audioWeight: input.audioWeight,
424
+ personaId: input.personaId,
425
+ personaModel: input.personaModel
355
426
  }, { taskId });
356
427
  await saveJob(manifest);
357
428
  if (input.background) {
@@ -368,9 +439,10 @@ const soundsOp = {
368
439
  BACKGROUND_DESCRIBE,
369
440
  input: z.object({
370
441
  prompt: z.string().max(500).describe('Sound description, e.g. "huge cinematic braam, dark low brass"'),
371
- soundKey: z.string().optional().describe('Pitch lock, e.g. C, Cm, F#, F#m (default Any)'),
442
+ soundKey: z.string().optional().describe('Pitch lock: C..B major or Cm..Bm minor, sharps as C# (default Any)'),
372
443
  tempo: z.number().int().min(1).max(300).optional().describe('BPM lock; omit for auto'),
373
444
  loop: z.boolean().optional().describe('Generate as a loopable sound'),
445
+ grabLyrics: z.boolean().optional().describe('Also capture lyric subtitles when the sound has vocals'),
374
446
  projectId: z.string().optional(),
375
447
  background: z.boolean().optional()
376
448
  }),
@@ -381,7 +453,8 @@ const soundsOp = {
381
453
  prompt: input.prompt,
382
454
  soundKey: input.soundKey,
383
455
  soundTempo: input.tempo,
384
- soundLoop: input.loop
456
+ soundLoop: input.loop,
457
+ grabLyrics: input.grabLyrics
385
458
  });
386
459
  const manifest = newJobManifest('sounds', `snd-${uuidv4().slice(0, 8)}`, projectId, baseName, {
387
460
  prompt: input.prompt,
@@ -389,7 +462,8 @@ const soundsOp = {
389
462
  model: 'V5',
390
463
  soundKey: input.soundKey,
391
464
  soundTempo: input.tempo,
392
- soundLoop: input.loop ?? false
465
+ soundLoop: input.loop ?? false,
466
+ grabLyrics: input.grabLyrics ?? false
393
467
  }, { taskId });
394
468
  await saveJob(manifest);
395
469
  if (input.background) {
@@ -399,23 +473,73 @@ const soundsOp = {
399
473
  return ok(jobSummary(finished), jobText(finished));
400
474
  }
401
475
  };
476
+ /** AIFF/FLAC → standardized WAV for upload (undocumented containers), then the
477
+ * provider File Upload API. The temp file is disposable the moment the upload
478
+ * returns — cleaned on every path (the leak here was a fresh-eyes review
479
+ * finding). Shared by cover / add-vocals / add-instrumental. */
480
+ async function uploadSourceAudio(sourcePath) {
481
+ let tempUpload = null;
482
+ try {
483
+ let uploadSource = sourcePath;
484
+ const ext = extname(sourcePath).toLowerCase();
485
+ if (ext !== '.wav' && ext !== '.mp3') {
486
+ const { tmpdir } = await import('node:os');
487
+ tempUpload = join(tmpdir(), `aurora-upload-${Date.now()}.wav`);
488
+ await standardizeToWav(sourcePath, tempUpload);
489
+ uploadSource = tempUpload;
490
+ }
491
+ return await uploadAudioFile(uploadSource);
492
+ }
493
+ finally {
494
+ if (tempUpload) {
495
+ const { rm } = await import('node:fs/promises');
496
+ await rm(tempUpload, { force: true }).catch(() => { });
497
+ }
498
+ }
499
+ }
500
+ /** Resolve a layering-op source to a local file path (asset or external). */
501
+ function resolveSourcePath(input) {
502
+ const sourceAsset = input.sourceAssetId ? getAsset(input.sourceAssetId) : null;
503
+ if (input.sourceAssetId && !sourceAsset)
504
+ throw new Error(`Asset not found: ${input.sourceAssetId}`);
505
+ const sourcePath = sourceAsset?.path ?? input.sourcePath;
506
+ if (!sourcePath || !existsSync(sourcePath)) {
507
+ throw new Error('Source not found — pass sourceAssetId (a project asset) or sourcePath (a file).');
508
+ }
509
+ return { sourcePath, sourceAsset };
510
+ }
402
511
  const coverOp = {
403
512
  id: 'aurora_cover',
404
513
  description: 'Cover a track (Suno upload-and-cover style transform): same musical content, new style. Source is a ' +
405
- 'project asset or an external file (max 8 minutes). 2 variations land as cover assets linked to the ' +
406
- 'source. PAID (~12 Suno credits + ~0.4/WAV) check aurora_get_credits first. ' +
514
+ 'project asset or an external file (max 8 minutes). COVERS RE-RENDER EVERYTHING in the reference to ' +
515
+ 'generate one complementary layer (e.g. a choir part), feed a stripped stem or bare melody render of ONLY ' +
516
+ 'the line to perform, NOT the full mix (full mix in = a choir performing your drums). Layering settings ' +
517
+ 'that lock structure while swapping timbre: audioWeight 0.7-0.85, styleWeight 0.55-0.75, ' +
518
+ 'weirdnessConstraint 0.2-0.4. 2 variations land as cover assets linked to the source. ' +
519
+ 'PAID (~12 Suno credits + ~0.4/WAV) — check aurora_get_credits first. ' +
407
520
  BACKGROUND_DESCRIBE,
408
521
  input: z.object({
409
522
  sourceAssetId: z.string().optional().describe('Project asset to transform'),
410
523
  sourcePath: z.string().optional().describe('OR an external audio file path'),
411
- prompt: z.string().describe('What the cover should sound like'),
412
- style: z.string().optional().describe('Target style (custom mode needs BOTH style and title)'),
524
+ prompt: z.string().describe('Custom mode: exact lyrics. Non-custom: what the cover should sound like (≤500 chars)'),
525
+ customMode: z
526
+ .boolean()
527
+ .optional()
528
+ .describe('true = style + title required, prompt = literal lyrics. Default: true when style or title is set'),
529
+ style: z.string().optional().describe('Target style (custom mode needs BOTH style and title; ≤1000 chars on V4_5+)'),
413
530
  title: z.string().optional(),
414
531
  instrumental: z.boolean().optional(),
415
- model: z.string().optional().describe(`Suno model id (default ${DEFAULT_GEN_MODEL})`),
532
+ model: z
533
+ .string()
534
+ .optional()
535
+ .describe(`V4 | V4_5 | V4_5PLUS | V4_5ALL | V5 | V5_5 (default ${DEFAULT_GEN_MODEL}; V4_5ALL caps input at 1 min)`),
416
536
  vocalGender: z.enum(['male', 'female']).optional(),
417
- negativeTags: z.string().optional().describe('Styles to exclude, ONE comma-separated string'),
418
- audioWeight: z.number().min(0).max(1).optional().describe('0..1 — 0 = new style dominates, 1 = stay close to the source'),
537
+ negativeTags: z.string().optional().describe('Styles/instruments to exclude, ONE comma-separated string'),
538
+ audioWeight: z.number().min(0).max(1).optional().describe('0..1 — 0 = new style dominates, 1 = stay close to the source. 0.7-0.85 = structure locked, timbre swapped'),
539
+ styleWeight: styleWeightSchema,
540
+ weirdnessConstraint: weirdnessSchema,
541
+ personaId: personaIdSchema,
542
+ personaModel: personaModelSchema,
419
543
  projectId: z.string().optional(),
420
544
  background: z.boolean().optional(),
421
545
  fetchWav: z
@@ -431,7 +555,7 @@ const coverOp = {
431
555
  if (!sourcePath || !existsSync(sourcePath)) {
432
556
  throw new Error('Cover source not found — pass sourceAssetId (a project asset) or sourcePath (a file).');
433
557
  }
434
- const customMode = Boolean(input.style || input.title);
558
+ const customMode = input.customMode ?? Boolean(input.style || input.title);
435
559
  if (customMode && (!input.style || !input.title)) {
436
560
  throw new Error('Custom mode needs BOTH a style and a title (you set only one).');
437
561
  }
@@ -448,28 +572,7 @@ const coverOp = {
448
572
  throw new Error(`Project not found: ${input.projectId}`);
449
573
  })())
450
574
  : (sourceAsset?.projectId ?? (await createProject(baseName)).id);
451
- // AIFF/FLAC standardized WAV for upload (undocumented containers). The
452
- // temp file is disposable the moment the upload returns — clean it on every
453
- // path (the leak here was a fresh-eyes review finding).
454
- let uploadUrl;
455
- let tempUpload = null;
456
- try {
457
- let uploadSource = sourcePath;
458
- const ext = extname(sourcePath).toLowerCase();
459
- if (ext !== '.wav' && ext !== '.mp3') {
460
- const { tmpdir } = await import('node:os');
461
- tempUpload = join(tmpdir(), `aurora-cover-upload-${Date.now()}.wav`);
462
- await standardizeToWav(sourcePath, tempUpload);
463
- uploadSource = tempUpload;
464
- }
465
- uploadUrl = await uploadAudioFile(uploadSource);
466
- }
467
- finally {
468
- if (tempUpload) {
469
- const { rm } = await import('node:fs/promises');
470
- await rm(tempUpload, { force: true }).catch(() => { });
471
- }
472
- }
575
+ const uploadUrl = await uploadSourceAudio(sourcePath);
473
576
  const taskId = await createCover({
474
577
  uploadUrl,
475
578
  prompt: input.prompt,
@@ -480,17 +583,26 @@ const coverOp = {
480
583
  model,
481
584
  vocalGender: input.vocalGender,
482
585
  negativeTags: input.negativeTags,
483
- audioWeight: input.audioWeight
586
+ audioWeight: input.audioWeight,
587
+ styleWeight: input.styleWeight,
588
+ weirdnessConstraint: input.weirdnessConstraint,
589
+ personaId: input.personaId,
590
+ personaModel: input.personaModel
484
591
  });
485
592
  const manifest = newJobManifest('cover', `cov-${uuidv4().slice(0, 8)}`, projectId, baseName, {
486
593
  prompt: input.prompt,
594
+ customMode,
487
595
  style: input.style,
488
596
  title: input.title,
489
597
  instrumental: input.instrumental ?? false,
490
598
  model,
491
599
  vocalGender: input.vocalGender ?? null,
492
600
  negativeTags: input.negativeTags,
493
- audioWeight: input.audioWeight
601
+ audioWeight: input.audioWeight,
602
+ styleWeight: input.styleWeight,
603
+ weirdnessConstraint: input.weirdnessConstraint,
604
+ personaId: input.personaId,
605
+ personaModel: input.personaModel
494
606
  }, { taskId, sourceAssetId: sourceAsset?.id ?? null });
495
607
  await saveJob(manifest);
496
608
  if (input.background) {
@@ -516,6 +628,178 @@ const coverOp = {
516
628
  return ok(summary, `${jobText(finished)}${wavNotes.length > 0 ? ` WAV stage: ${wavNotes.join('; ')}` : ''}`);
517
629
  }
518
630
  };
631
+ const addVocalsOp = {
632
+ id: 'aurora_add_vocals',
633
+ description: 'Layer AI vocals ON TOP of an instrumental (Suno add-vocals): upload a track, get vocals performed ' +
634
+ 'against its tempo/key/changes. THE op for adding a choir or vocal part to an existing production: ' +
635
+ 'feed a SIMPLIFIED bounce (harmonic skeleton + the melody to relate to — strip drums/dense ornament), ' +
636
+ 'audioWeight 0.7-0.85, choir-steering style + negativeTags, then aurora_split the result and keep ONLY ' +
637
+ 'the vocals stem to lay over the real production. Output is a full mix; the vocal stem is the deliverable. ' +
638
+ 'PAID (Suno credits) — check aurora_get_credits first. ' +
639
+ BACKGROUND_DESCRIBE,
640
+ input: z.object({
641
+ sourceAssetId: z.string().optional().describe('Project asset to sing over'),
642
+ sourcePath: z.string().optional().describe('OR an external audio file path'),
643
+ prompt: z
644
+ .string()
645
+ .describe('Vocal content + direction — lyrics or syllables (e.g. Latin chant for choir) with [Choir]/[Harmony] metatags'),
646
+ style: z
647
+ .string()
648
+ .describe('Vocal approach, e.g. "epic film choir, massed choral harmonies, latin chant" (this is what steers choir vs lead singer)'),
649
+ title: z.string().max(100).describe('Track title (≤100 chars)'),
650
+ negativeTags: z
651
+ .string()
652
+ .describe('Vocal styles to exclude, ONE comma-separated string, e.g. "lead singer, pop vocal, rap, spoken word, autotune"'),
653
+ vocalGender: z.enum(['male', 'female']).optional(),
654
+ styleWeight: styleWeightSchema,
655
+ weirdnessConstraint: weirdnessSchema,
656
+ audioWeight: audioWeightSchema,
657
+ model: z.string().optional().describe('V4_5PLUS (default) | V5 | V5_5 — this endpoint supports only these'),
658
+ projectId: z.string().optional(),
659
+ background: z.boolean().optional(),
660
+ fetchWav: z.boolean().optional().describe('Blocking mode only: also fetch the provider WAV per variation (default true)')
661
+ }),
662
+ async run(input) {
663
+ const { sourcePath, sourceAsset } = resolveSourcePath(input);
664
+ const baseName = input.title.trim() || `${basename(sourcePath, extname(sourcePath))} vocals`;
665
+ const projectId = input.projectId
666
+ ? (getProject(input.projectId)?.id ??
667
+ (() => {
668
+ throw new Error(`Project not found: ${input.projectId}`);
669
+ })())
670
+ : (sourceAsset?.projectId ?? (await createProject(baseName)).id);
671
+ const uploadUrl = await uploadSourceAudio(sourcePath);
672
+ const taskId = await createAddVocals({
673
+ uploadUrl,
674
+ prompt: input.prompt,
675
+ style: input.style,
676
+ title: input.title,
677
+ negativeTags: input.negativeTags,
678
+ vocalGender: input.vocalGender,
679
+ styleWeight: input.styleWeight,
680
+ weirdnessConstraint: input.weirdnessConstraint,
681
+ audioWeight: input.audioWeight,
682
+ model: (input.model ?? 'V4_5PLUS').replace(/\./g, '_')
683
+ });
684
+ const manifest = newJobManifest('add_vocals', `avo-${uuidv4().slice(0, 8)}`, projectId, baseName, {
685
+ op: 'add_vocals',
686
+ prompt: input.prompt,
687
+ style: input.style,
688
+ title: input.title,
689
+ negativeTags: input.negativeTags,
690
+ vocalGender: input.vocalGender ?? null,
691
+ styleWeight: input.styleWeight,
692
+ weirdnessConstraint: input.weirdnessConstraint,
693
+ audioWeight: input.audioWeight,
694
+ model: (input.model ?? 'V4_5PLUS').replace(/\./g, '_'),
695
+ instrumental: false
696
+ }, { taskId, sourceAssetId: sourceAsset?.id ?? null });
697
+ await saveJob(manifest);
698
+ if (input.background) {
699
+ return ok(jobSummary(manifest), jobText(manifest));
700
+ }
701
+ const finished = await awaitJob(manifest);
702
+ const wavNotes = [];
703
+ if (finished.status === 'done' && (input.fetchWav ?? true)) {
704
+ for (const assetId of finished.assetIds) {
705
+ try {
706
+ await fetchWavOp.run({ assetId });
707
+ wavNotes.push(`${assetId}: WAV fetched`);
708
+ }
709
+ catch (err) {
710
+ wavNotes.push(`${assetId}: WAV failed (${err instanceof Error ? err.message : err}) — MP3 kept; retry with aurora_fetch_wav`);
711
+ }
712
+ }
713
+ }
714
+ const summary = jobSummary(finished);
715
+ if (wavNotes.length > 0)
716
+ summary.wavStage = wavNotes;
717
+ return ok(summary, `${jobText(finished)}${wavNotes.length > 0 ? ` WAV stage: ${wavNotes.join('; ')}` : ''}` +
718
+ (finished.status === 'done'
719
+ ? ' Next for layering: aurora_split the result and keep the vocals stem.'
720
+ : ''));
721
+ }
722
+ };
723
+ const addInstrumentalOp = {
724
+ id: 'aurora_add_instrumental',
725
+ description: 'Generate backing instrumentation complementary to an uploaded audio (Suno add-instrumental — the ' +
726
+ 'inverse of aurora_add_vocals; input is usually a vocal or a melodic stem). Output is a full mix ' +
727
+ 'conditioned on the upload; split it to extract the new layers. PAID (Suno credits) — check ' +
728
+ 'aurora_get_credits first. ' +
729
+ BACKGROUND_DESCRIBE,
730
+ input: z.object({
731
+ sourceAssetId: z.string().optional().describe('Project asset to build instrumentation around'),
732
+ sourcePath: z.string().optional().describe('OR an external audio file path'),
733
+ title: z.string().max(100).describe('Track title (≤100 chars)'),
734
+ tags: z
735
+ .string()
736
+ .describe('Desired instrumental style/mood/instruments (this endpoint names the field tags, comma-separated)'),
737
+ negativeTags: z.string().describe('Styles/instruments to exclude, ONE comma-separated string'),
738
+ vocalGender: z.enum(['male', 'female']).optional(),
739
+ styleWeight: styleWeightSchema,
740
+ weirdnessConstraint: weirdnessSchema,
741
+ audioWeight: audioWeightSchema,
742
+ model: z.string().optional().describe('V4_5PLUS (default) | V5 | V5_5 — this endpoint supports only these'),
743
+ projectId: z.string().optional(),
744
+ background: z.boolean().optional(),
745
+ fetchWav: z.boolean().optional().describe('Blocking mode only: also fetch the provider WAV per variation (default true)')
746
+ }),
747
+ async run(input) {
748
+ const { sourcePath, sourceAsset } = resolveSourcePath(input);
749
+ const baseName = input.title.trim() || `${basename(sourcePath, extname(sourcePath))} instrumental`;
750
+ const projectId = input.projectId
751
+ ? (getProject(input.projectId)?.id ??
752
+ (() => {
753
+ throw new Error(`Project not found: ${input.projectId}`);
754
+ })())
755
+ : (sourceAsset?.projectId ?? (await createProject(baseName)).id);
756
+ const uploadUrl = await uploadSourceAudio(sourcePath);
757
+ const taskId = await createAddInstrumental({
758
+ uploadUrl,
759
+ title: input.title,
760
+ tags: input.tags,
761
+ negativeTags: input.negativeTags,
762
+ vocalGender: input.vocalGender,
763
+ styleWeight: input.styleWeight,
764
+ weirdnessConstraint: input.weirdnessConstraint,
765
+ audioWeight: input.audioWeight,
766
+ model: (input.model ?? 'V4_5PLUS').replace(/\./g, '_')
767
+ });
768
+ const manifest = newJobManifest('add_instrumental', `ain-${uuidv4().slice(0, 8)}`, projectId, baseName, {
769
+ op: 'add_instrumental',
770
+ title: input.title,
771
+ tags: input.tags,
772
+ negativeTags: input.negativeTags,
773
+ vocalGender: input.vocalGender ?? null,
774
+ styleWeight: input.styleWeight,
775
+ weirdnessConstraint: input.weirdnessConstraint,
776
+ audioWeight: input.audioWeight,
777
+ model: (input.model ?? 'V4_5PLUS').replace(/\./g, '_'),
778
+ instrumental: true
779
+ }, { taskId, sourceAssetId: sourceAsset?.id ?? null });
780
+ await saveJob(manifest);
781
+ if (input.background) {
782
+ return ok(jobSummary(manifest), jobText(manifest));
783
+ }
784
+ const finished = await awaitJob(manifest);
785
+ const wavNotes = [];
786
+ if (finished.status === 'done' && (input.fetchWav ?? true)) {
787
+ for (const assetId of finished.assetIds) {
788
+ try {
789
+ await fetchWavOp.run({ assetId });
790
+ wavNotes.push(`${assetId}: WAV fetched`);
791
+ }
792
+ catch (err) {
793
+ wavNotes.push(`${assetId}: WAV failed (${err instanceof Error ? err.message : err}) — MP3 kept; retry with aurora_fetch_wav`);
794
+ }
795
+ }
796
+ }
797
+ const summary = jobSummary(finished);
798
+ if (wavNotes.length > 0)
799
+ summary.wavStage = wavNotes;
800
+ return ok(summary, `${jobText(finished)}${wavNotes.length > 0 ? ` WAV stage: ${wavNotes.join('; ')}` : ''}`);
801
+ }
802
+ };
519
803
  const splitOp = {
520
804
  id: 'aurora_split',
521
805
  description: 'Split ANY project asset into 7 stems (vocals, kick, snare, toms, hats, bass, everything-else) via ' +
@@ -545,6 +829,96 @@ const splitOp = {
545
829
  return ok(jobSummary(finished), jobText(finished));
546
830
  }
547
831
  };
832
+ const VOCAL_MODE_DESCRIBE = "lead_back = lead + backing vocals; male_female = male + female voices. Vocal stems come from the mode, never from the stems array";
833
+ const extractOp = {
834
+ id: 'aurora_extract',
835
+ description: 'The Sample Extractor: pull SPECIFIC instruments out of ANY asset via the per-instrument MVSEP ' +
836
+ 'catalog (~35 instruments + bundles). Everything Else is ALWAYS included free (local phase-cancel), ' +
837
+ 'so the parts sum back to the original. VARIABLE PAID COST: one MVSEP call per individual stem, but ' +
838
+ 'bundles count ONCE however many members you pick (drum kit = 6 stems for 1 call; lead+rhythm guitar ' +
839
+ '= 1 call; vocal modes = 1 call; dereverb = 1 call). Call with estimateOnly=true FIRST to see the ' +
840
+ 'exact call plan before spending. 12-minute input cap. Results land in <project>/extracts/ + the ' +
841
+ 'extraction_stems table; detected musical key rides every row. Takes minutes per call (sequential). ' +
842
+ BACKGROUND_DESCRIBE,
843
+ input: z.object({
844
+ assetId: z.string().describe('The asset to extract from (any kind)'),
845
+ stems: z
846
+ .array(z.string())
847
+ .optional()
848
+ .describe('Non-vocal catalog stem ids. Bundles: drum_kick/drum_snare/drum_toms/drum_hihats/' +
849
+ 'drum_cymbals_crash/drum_cymbals_ride (one call), guitar_lead/guitar_rhythm (one call). ' +
850
+ 'Individuals: piano, digital_piano, organ, accordion, harpsichord, saxophone, flute, trumpet, ' +
851
+ 'trombone, french_horn, tuba, clarinet, oboe, bassoon, harmonica, guitar_acoustic, ' +
852
+ 'guitar_electric, mandolin, banjo, ukulele, harp, sitar, dobro, violin, viola, cello, ' +
853
+ 'double_bass, bells, congas, tambourine, marimba, glockenspiel, timpani, triangle, ' +
854
+ 'wind_chimes, bass, synth'),
855
+ vocalMode: z.enum(['lead_back', 'male_female']).optional().describe(VOCAL_MODE_DESCRIBE),
856
+ includeReverb: z
857
+ .boolean()
858
+ .optional()
859
+ .describe('Dereverb the vocal first: adds a reverb-tail stem; with a vocalMode the bundle runs on the DRY ' +
860
+ 'vocal; alone it delivers dry vocal + reverb tail'),
861
+ estimateOnly: z
862
+ .boolean()
863
+ .optional()
864
+ .describe('Return the call plan + cost estimate WITHOUT spending anything'),
865
+ background: z.boolean().optional().describe('Strongly recommended — sequential calls take minutes each')
866
+ }),
867
+ async run(input) {
868
+ const selection = {
869
+ stems: input.stems ?? [],
870
+ vocalSeparationType: input.vocalMode ?? null,
871
+ includeReverb: input.includeReverb ?? false
872
+ };
873
+ // Validate selection ids early (clear error beats a silent no-op call plan).
874
+ const known = new Set([
875
+ ...Object.keys(EXTRACT_INDIVIDUAL_STEMS),
876
+ ...EXTRACT_BUNDLES.drumsep.stems,
877
+ ...EXTRACT_BUNDLES.lead_rhythm_guitar.stems
878
+ ]);
879
+ const unknown = selection.stems.filter((s) => !known.has(s));
880
+ if (unknown.length > 0) {
881
+ throw new Error(`Unknown stem id(s): ${unknown.join(', ')}. See the stems param description for the catalog.`);
882
+ }
883
+ const asset = getAsset(input.assetId);
884
+ if (!asset)
885
+ throw new Error(`Asset not found: ${input.assetId}`);
886
+ const duration = await probeDurationSeconds(asset.path);
887
+ const estimate = estimateExtractCost(selection, duration ?? 60);
888
+ if (input.estimateOnly) {
889
+ return ok({
890
+ estimate,
891
+ durationSeconds: duration,
892
+ note: 'Nothing spent. Re-run without estimateOnly to fire the plan.'
893
+ }, `Plan: ${estimate.totalCalls} MVSEP call(s) on a ~${estimate.minuteMultiplier}-minute track. ` +
894
+ `Bundles: ${estimate.breakdown.bundles.map((b) => b.bundleId).join(', ') || 'none'}. ` +
895
+ `Individual: ${estimate.breakdown.individualStems.join(', ') || 'none'}. EE included free. Nothing spent.`);
896
+ }
897
+ const { asset: prepared, state } = await prepareExtract(input.assetId, selection);
898
+ const manifest = newJobManifest('extract', `ext-${uuidv4().slice(0, 8)}`, prepared.projectId, prepared.name, {
899
+ assetId: input.assetId,
900
+ stems: selection.stems,
901
+ vocalMode: selection.vocalSeparationType,
902
+ includeReverb: selection.includeReverb,
903
+ plannedCalls: estimate.totalCalls
904
+ }, { assetId: input.assetId, extract: state });
905
+ manifest.stage = `planned ${state.calls.length} MVSEP call(s)`;
906
+ await saveJob(manifest);
907
+ if (input.background) {
908
+ const summary = jobSummary(manifest);
909
+ summary.estimate = estimate;
910
+ return ok(summary, `${jobText(manifest)} Plan: ${estimate.totalCalls} MVSEP call(s).`);
911
+ }
912
+ const finished = await awaitJob(manifest);
913
+ const summary = jobSummary(finished);
914
+ summary.estimate = estimate;
915
+ if (finished.status === 'done' && finished.provider.extract?.detectedKey) {
916
+ ;
917
+ summary.detectedKey = finished.provider.extract.detectedKey;
918
+ }
919
+ return ok(summary, jobText(finished));
920
+ }
921
+ };
548
922
  const getJobStatusOp = {
549
923
  id: 'aurora_get_job_status',
550
924
  description: 'Poll a background job (generate / sounds / cover / split). Advances the job: downloads and ' +
@@ -831,7 +1205,10 @@ export const ALL_OPERATIONS = [
831
1205
  generateOp,
832
1206
  soundsOp,
833
1207
  coverOp,
1208
+ addVocalsOp,
1209
+ addInstrumentalOp,
834
1210
  splitOp,
1211
+ extractOp,
835
1212
  getJobStatusOp,
836
1213
  listJobsOp,
837
1214
  pitchShiftOp,