npm - varg.ai-sdk - Versions diffs - 0.1.0 → 0.4.0-alpha.1 - Mend

varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (236) hide show

package/.claude/settings.local.json +1 -1
package/.env.example +3 -0
package/.github/workflows/ci.yml +23 -0
package/.husky/README.md +102 -0
package/.husky/commit-msg +6 -0
package/.husky/pre-commit +9 -0
package/.husky/pre-push +6 -0
package/.size-limit.json +8 -0
package/.test-hooks.ts +5 -0
package/CLAUDE.md +10 -3
package/CONTRIBUTING.md +150 -0
package/LICENSE.md +53 -0
package/README.md +56 -209
package/SKILLS.md +26 -10
package/biome.json +7 -1
package/bun.lock +1286 -0
package/commitlint.config.js +22 -0
package/docs/index.html +1130 -0
package/docs/prompting.md +326 -0
package/docs/react.md +834 -0
package/docs/sdk.md +812 -0
package/ffmpeg/CLAUDE.md +68 -0
package/package.json +48 -8
package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
package/pipeline/cookbooks/text-to-tiktok.md +669 -0
package/pipeline/cookbooks/trendwatching.md +156 -0
package/plan.md +281 -0
package/scripts/.gitkeep +0 -0
package/src/ai-sdk/cache.ts +142 -0
package/src/ai-sdk/examples/cached-generation.ts +53 -0
package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
package/src/ai-sdk/examples/duet-video.ts +56 -0
package/src/ai-sdk/examples/editly-composition.ts +63 -0
package/src/ai-sdk/examples/editly-test.ts +57 -0
package/src/ai-sdk/examples/editly-video-test.ts +52 -0
package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
package/src/ai-sdk/examples/music-generation.ts +19 -0
package/src/ai-sdk/examples/openai-sora.ts +34 -0
package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
package/src/ai-sdk/examples/talking-lion.ts +55 -0
package/src/ai-sdk/examples/video-generation.ts +39 -0
package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
package/src/ai-sdk/file-cache.ts +112 -0
package/src/ai-sdk/file.ts +238 -0
package/src/ai-sdk/generate-element.ts +92 -0
package/src/ai-sdk/generate-music.ts +46 -0
package/src/ai-sdk/generate-video.ts +165 -0
package/src/ai-sdk/index.ts +72 -0
package/src/ai-sdk/music-model.ts +110 -0
package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
package/src/ai-sdk/providers/editly/index.ts +817 -0
package/src/ai-sdk/providers/editly/layers.ts +776 -0
package/src/ai-sdk/providers/editly/plan.md +144 -0
package/src/ai-sdk/providers/editly/types.ts +328 -0
package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
package/src/ai-sdk/providers/fal-provider.ts +512 -0
package/src/ai-sdk/providers/higgsfield.ts +379 -0
package/src/ai-sdk/providers/openai.ts +251 -0
package/src/ai-sdk/providers/replicate.ts +16 -0
package/src/ai-sdk/video-model.ts +185 -0
package/src/cli/commands/find.tsx +137 -0
package/src/cli/commands/help.tsx +85 -0
package/src/cli/commands/index.ts +6 -0
package/src/cli/commands/list.tsx +238 -0
package/src/cli/commands/render.tsx +71 -0
package/src/cli/commands/run.tsx +511 -0
package/src/cli/commands/which.tsx +253 -0
package/src/cli/index.ts +114 -0
package/src/cli/quiet.ts +44 -0
package/src/cli/types.ts +32 -0
package/src/cli/ui/components/Badge.tsx +29 -0
package/src/cli/ui/components/DataTable.tsx +51 -0
package/src/cli/ui/components/Header.tsx +23 -0
package/src/cli/ui/components/HelpBlock.tsx +44 -0
package/src/cli/ui/components/KeyValue.tsx +33 -0
package/src/cli/ui/components/OptionRow.tsx +81 -0
package/src/cli/ui/components/Separator.tsx +23 -0
package/src/cli/ui/components/StatusBox.tsx +108 -0
package/src/cli/ui/components/VargBox.tsx +51 -0
package/src/cli/ui/components/VargProgress.tsx +36 -0
package/src/cli/ui/components/VargSpinner.tsx +34 -0
package/src/cli/ui/components/VargText.tsx +56 -0
package/src/cli/ui/components/index.ts +19 -0
package/src/cli/ui/index.ts +12 -0
package/src/cli/ui/render.ts +35 -0
package/src/cli/ui/theme.ts +63 -0
package/src/cli/utils.ts +78 -0
package/src/core/executor/executor.ts +201 -0
package/src/core/executor/index.ts +13 -0
package/src/core/executor/job.ts +214 -0
package/src/core/executor/pipeline.ts +222 -0
package/src/core/index.ts +11 -0
package/src/core/registry/index.ts +9 -0
package/src/core/registry/loader.ts +149 -0
package/src/core/registry/registry.ts +221 -0
package/src/core/registry/resolver.ts +206 -0
package/src/core/schema/helpers.ts +134 -0
package/src/core/schema/index.ts +8 -0
package/src/core/schema/shared.ts +102 -0
package/src/core/schema/types.ts +279 -0
package/src/core/schema/validator.ts +92 -0
package/src/definitions/actions/captions.ts +261 -0
package/src/definitions/actions/edit.ts +298 -0
package/src/definitions/actions/image.ts +125 -0
package/src/definitions/actions/index.ts +114 -0
package/src/definitions/actions/music.ts +205 -0
package/src/definitions/actions/sync.ts +128 -0
package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
package/src/definitions/actions/upload.ts +111 -0
package/src/definitions/actions/video.ts +163 -0
package/src/definitions/actions/voice.ts +119 -0
package/src/definitions/index.ts +23 -0
package/src/definitions/models/elevenlabs.ts +50 -0
package/src/definitions/models/flux.ts +56 -0
package/src/definitions/models/index.ts +36 -0
package/src/definitions/models/kling.ts +56 -0
package/src/definitions/models/llama.ts +54 -0
package/src/definitions/models/nano-banana-pro.ts +102 -0
package/src/definitions/models/sonauto.ts +68 -0
package/src/definitions/models/soul.ts +65 -0
package/src/definitions/models/wan.ts +54 -0
package/src/definitions/models/whisper.ts +44 -0
package/src/definitions/skills/index.ts +12 -0
package/src/definitions/skills/talking-character.ts +87 -0
package/src/definitions/skills/text-to-tiktok.ts +97 -0
package/src/index.ts +118 -0
package/src/providers/apify.ts +269 -0
package/src/providers/base.ts +264 -0
package/src/providers/elevenlabs.ts +217 -0
package/src/providers/fal.ts +392 -0
package/src/providers/ffmpeg.ts +544 -0
package/src/providers/fireworks.ts +193 -0
package/src/providers/groq.ts +149 -0
package/src/providers/higgsfield.ts +145 -0
package/src/providers/index.ts +143 -0
package/src/providers/replicate.ts +147 -0
package/src/providers/storage.ts +206 -0
package/src/react/cli.ts +52 -0
package/src/react/elements.ts +146 -0
package/src/react/examples/branching.tsx +66 -0
package/src/react/examples/captions-demo.tsx +37 -0
package/src/react/examples/character-video.tsx +84 -0
package/src/react/examples/grid.tsx +53 -0
package/src/react/examples/layouts-demo.tsx +57 -0
package/src/react/examples/madi.tsx +60 -0
package/src/react/examples/music-test.tsx +35 -0
package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
package/src/react/examples/orange-portrait.tsx +41 -0
package/src/react/examples/split-element-demo.tsx +60 -0
package/src/react/examples/split-layout-demo.tsx +60 -0
package/src/react/examples/split.tsx +41 -0
package/src/react/examples/video-grid.tsx +46 -0
package/src/react/index.ts +43 -0
package/src/react/layouts/grid.tsx +28 -0
package/src/react/layouts/index.ts +2 -0
package/src/react/layouts/split.tsx +20 -0
package/src/react/react.test.ts +309 -0
package/src/react/render.ts +21 -0
package/src/react/renderers/animate.ts +59 -0
package/src/react/renderers/captions.ts +297 -0
package/src/react/renderers/clip.ts +248 -0
package/src/react/renderers/context.ts +17 -0
package/src/react/renderers/image.ts +109 -0
package/src/react/renderers/index.ts +22 -0
package/src/react/renderers/music.ts +60 -0
package/src/react/renderers/packshot.ts +84 -0
package/src/react/renderers/progress.ts +173 -0
package/src/react/renderers/render.ts +243 -0
package/src/react/renderers/slider.ts +69 -0
package/src/react/renderers/speech.ts +53 -0
package/src/react/renderers/split.ts +91 -0
package/src/react/renderers/subtitle.ts +16 -0
package/src/react/renderers/swipe.ts +75 -0
package/src/react/renderers/title.ts +17 -0
package/src/react/renderers/utils.ts +124 -0
package/src/react/renderers/video.ts +127 -0
package/src/react/runtime/jsx-dev-runtime.ts +43 -0
package/src/react/runtime/jsx-runtime.ts +35 -0
package/src/react/types.ts +232 -0
package/src/studio/index.ts +26 -0
package/src/studio/scanner.ts +102 -0
package/src/studio/server.ts +554 -0
package/src/studio/stages.ts +251 -0
package/src/studio/step-renderer.ts +279 -0
package/src/studio/types.ts +60 -0
package/src/studio/ui/cache.html +303 -0
package/src/studio/ui/index.html +1820 -0
package/src/tests/all.test.ts +509 -0
package/src/tests/index.ts +33 -0
package/src/tests/unit.test.ts +403 -0
package/tsconfig.cli.json +8 -0
package/tsconfig.json +21 -3
package/TEST_RESULTS.md +0 -122
package/action/captions/SKILL.md +0 -170
package/action/captions/index.ts +0 -227
package/action/edit/SKILL.md +0 -235
package/action/edit/index.ts +0 -493
package/action/image/SKILL.md +0 -140
package/action/image/index.ts +0 -112
package/action/sync/SKILL.md +0 -136
package/action/sync/index.ts +0 -187
package/action/transcribe/SKILL.md +0 -179
package/action/video/SKILL.md +0 -116
package/action/video/index.ts +0 -135
package/action/voice/SKILL.md +0 -125
package/action/voice/index.ts +0 -201
package/index.ts +0 -38
package/lib/README.md +0 -144
package/lib/ai-sdk/fal.ts +0 -106
package/lib/ai-sdk/replicate.ts +0 -107
package/lib/elevenlabs.ts +0 -382
package/lib/fal.ts +0 -478
package/lib/ffmpeg.ts +0 -467
package/lib/fireworks.ts +0 -235
package/lib/groq.ts +0 -246
package/lib/higgsfield.ts +0 -176
package/lib/remotion/SKILL.md +0 -823
package/lib/remotion/cli.ts +0 -115
package/lib/remotion/functions.ts +0 -283
package/lib/remotion/index.ts +0 -19
package/lib/remotion/templates.ts +0 -73
package/lib/replicate.ts +0 -304
package/output.txt +0 -1
package/test-import.ts +0 -7
package/test-services.ts +0 -97
package/utilities/s3.ts +0 -147

package/src/definitions/actions/index.ts ADDED Viewed

@@ -0,0 +1,114 @@
+/**
+ * Action definitions index
+ */
+export type { AddCaptionsOptions, SubtitleStyle } from "./captions";
+// Captions
+export { addCaptions, definition as captions } from "./captions";
+export type {
+  CutOptions,
+  CutResult,
+  FadeOptions,
+  FadeResult,
+  MergeOptions,
+  MergeResult,
+  RemoveOptions,
+  RemoveResult,
+  SplitOptions,
+  SplitResult,
+  TransitionOptions,
+  TransitionResult,
+  TrimOptions,
+  TrimResult,
+} from "./edit";
+// Video editing (FFmpeg)
+export {
+  cut,
+  cutDefinition,
+  fade,
+  fadeDefinition,
+  merge,
+  mergeDefinition,
+  remove,
+  removeDefinition,
+  split,
+  splitDefinition,
+  transition,
+  transitionDefinition,
+  trim,
+  trimDefinition,
+} from "./edit";
+export type { ImageGenerationResult } from "./image";
+// Image generation
+export {
+  definition as image,
+  generateWithFal,
+  generateWithSoul,
+} from "./image";
+export type { GenerateMusicOptions, MusicResult } from "./music";
+// Music generation
+export { definition as music, generateMusic } from "./music";
+export type { LipsyncOptions, LipsyncResult, Wav2LipOptions } from "./sync";
+// Lip sync
+export {
+  definition as sync,
+  lipsync,
+  lipsyncOverlay,
+  lipsyncWav2Lip,
+} from "./sync";
+export type { TranscribeOptions, TranscribeResult } from "./transcribe";
+// Transcription
+export {
+  definition as transcribe,
+  transcribe as transcribeAudio,
+} from "./transcribe";
+export type { UploadOptions, UploadResult } from "./upload";
+// Upload
+export { definition as uploadDef, upload } from "./upload";
+export type { VideoGenerationResult } from "./video";
+// Video generation
+export {
+  definition as video,
+  generateVideoFromImage,
+  generateVideoFromText,
+} from "./video";
+export type { GenerateVoiceOptions, VoiceResult } from "./voice";
+// Voice generation
+export { definition as voice, generateVoice } from "./voice";
+// All action definitions for auto-loading
+import { definition as captionsDefinition } from "./captions";
+import {
+  cutDefinition,
+  fadeDefinition,
+  mergeDefinition,
+  removeDefinition,
+  splitDefinition,
+  transitionDefinition,
+  trimDefinition,
+} from "./edit";
+import { definition as imageDefinition } from "./image";
+import { definition as musicDefinition } from "./music";
+import { definition as syncDefinition } from "./sync";
+import { definition as transcribeDefinition } from "./transcribe";
+import { definition as uploadDefinition } from "./upload";
+import { definition as videoDefinition } from "./video";
+import { definition as voiceDefinition } from "./voice";
+export const allActions = [
+  videoDefinition,
+  imageDefinition,
+  voiceDefinition,
+  transcribeDefinition,
+  musicDefinition,
+  syncDefinition,
+  captionsDefinition,
+  trimDefinition,
+  cutDefinition,
+  mergeDefinition,
+  splitDefinition,
+  fadeDefinition,
+  transitionDefinition,
+  removeDefinition,
+  uploadDefinition,
+];

package/src/definitions/actions/music.ts ADDED Viewed

@@ -0,0 +1,205 @@
+/**
+ * Music generation action
+ * Text-to-music via Fal/Sonauto
+ */
+import { writeFile } from "node:fs/promises";
+import { z } from "zod";
+import { audioFormatSchema, filePathSchema } from "../../core/schema/shared";
+import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
+import { falProvider } from "../../providers/fal";
+import { storageProvider } from "../../providers/storage";
+// Input schema with Zod
+const musicInputSchema = z.object({
+  prompt: z.string().optional().describe("Description of music to generate"),
+  tags: z
+    .array(z.string())
+    .optional()
+    .describe("Style tags like 'rock', 'energetic'"),
+  lyrics: z.string().optional().describe("Optional lyrics prompt"),
+  format: audioFormatSchema.default("mp3").describe("Output format"),
+  numSongs: z
+    .union([z.literal(1), z.literal(2)])
+    .default(1)
+    .describe("Number of songs to generate"),
+  output: filePathSchema.optional().describe("Output file path"),
+});
+// Output schema with Zod
+const musicOutputSchema = z.object({
+  seed: z.number(),
+  tags: z.array(z.string()).optional(),
+  lyrics: z.string().optional(),
+  audio: z.array(
+    z.object({
+      url: z.string(),
+      fileName: z.string(),
+      contentType: z.string(),
+      fileSize: z.number(),
+    }),
+  ),
+  uploadUrls: z.array(z.string()).optional(),
+});
+// Schema object for the definition
+const schema: ZodSchema<typeof musicInputSchema, typeof musicOutputSchema> = {
+  input: musicInputSchema,
+  output: musicOutputSchema,
+};
+export const definition: ActionDefinition<typeof schema> = {
+  type: "action",
+  name: "music",
+  description: "Generate music from text prompt or tags",
+  schema,
+  routes: [],
+  execute: async (inputs) => {
+    return generateMusic({
+      prompt: inputs.prompt,
+      tags: inputs.tags,
+      lyrics: inputs.lyrics,
+      format: inputs.format,
+      numSongs: inputs.numSongs,
+      outputPath: inputs.output,
+    });
+  },
+};
+// Types
+export interface GenerateMusicOptions {
+  prompt?: string;
+  tags?: string[];
+  lyrics?: string;
+  seed?: number;
+  promptStrength?: number;
+  balanceStrength?: number;
+  numSongs?: 1 | 2;
+  format?: "flac" | "mp3" | "wav" | "ogg" | "m4a";
+  bitRate?: 128 | 192 | 256 | 320;
+  bpm?: number | "auto";
+  upload?: boolean;
+  outputPath?: string;
+}
+export interface MusicResult {
+  seed: number;
+  tags?: string[];
+  lyrics?: string;
+  audio: Array<{
+    url: string;
+    fileName: string;
+    contentType: string;
+    fileSize: number;
+  }>;
+  uploadUrls?: string[];
+}
+export async function generateMusic(
+  options: GenerateMusicOptions,
+): Promise<MusicResult> {
+  const {
+    prompt,
+    tags,
+    lyrics,
+    seed,
+    promptStrength = 2,
+    balanceStrength = 0.7,
+    numSongs = 1,
+    format = "mp3",
+    bitRate,
+    bpm = "auto",
+    upload = false,
+    outputPath,
+  } = options;
+  if (!prompt && !tags) {
+    throw new Error("Either prompt or tags is required");
+  }
+  console.log(`[music] generating ${numSongs} song(s)...`);
+  if (prompt) console.log(`[music] prompt: ${prompt}`);
+  if (tags) console.log(`[music] tags: ${tags.join(", ")}`);
+  const result = await falProvider.textToMusic({
+    prompt,
+    tags,
+    lyricsPrompt: lyrics,
+    seed,
+    promptStrength,
+    balanceStrength,
+    numSongs,
+    outputFormat: format,
+    outputBitRate: bitRate,
+    bpm,
+  });
+  const musicResult: MusicResult = {
+    seed: result.data.seed,
+    tags: result.data.tags,
+    lyrics: result.data.lyrics,
+    audio: Array.isArray(result.data.audio)
+      ? result.data.audio.map(
+          (a: {
+            url: string;
+            file_name: string;
+            content_type: string;
+            file_size: number;
+          }) => ({
+            url: a.url,
+            fileName: a.file_name,
+            contentType: a.content_type,
+            fileSize: a.file_size,
+          }),
+        )
+      : [
+          {
+            url: result.data.audio.url,
+            fileName: result.data.audio.file_name,
+            contentType: result.data.audio.content_type,
+            fileSize: result.data.audio.file_size,
+          },
+        ],
+  };
+  // Save files locally if requested
+  if (outputPath) {
+    for (let i = 0; i < musicResult.audio.length; i++) {
+      const audio = musicResult.audio[i];
+      if (!audio) continue;
+      const ext = format || "wav";
+      const filePath =
+        musicResult.audio.length === 1
+          ? outputPath
+          : outputPath.replace(/\.[^.]+$/, `-${i + 1}.${ext}`);
+      const response = await fetch(audio.url);
+      const buffer = await response.arrayBuffer();
+      await writeFile(filePath, Buffer.from(buffer));
+      console.log(`[music] saved to ${filePath}`);
+    }
+  }
+  // Upload to storage if requested
+  if (upload) {
+    const uploadUrls: string[] = [];
+    for (let i = 0; i < musicResult.audio.length; i++) {
+      const audio = musicResult.audio[i];
+      if (!audio) continue;
+      const objectKey = `music/${Date.now()}-${i + 1}.${format || "wav"}`;
+      const uploadUrl = await storageProvider.uploadFromUrl(
+        audio.url,
+        objectKey,
+      );
+      uploadUrls.push(uploadUrl);
+      console.log(`[music] uploaded to ${uploadUrl}`);
+    }
+    musicResult.uploadUrls = uploadUrls;
+  }
+  return musicResult;
+}
+export default definition;

package/src/definitions/actions/sync.ts ADDED Viewed

@@ -0,0 +1,128 @@
+/**
+ * Lip sync action
+ * Audio-to-video synchronization
+ */
+import { z } from "zod";
+import {
+  filePathSchema,
+  resolutionSchema,
+  videoDurationStringSchema,
+} from "../../core/schema/shared";
+import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
+import { falProvider } from "../../providers/fal";
+import { ffmpegProvider } from "../../providers/ffmpeg";
+// Input schema with Zod
+const syncInputSchema = z.object({
+  image: filePathSchema.describe("Input image"),
+  audio: filePathSchema.describe("Audio file"),
+  prompt: z.string().describe("Description of the scene"),
+  duration: videoDurationStringSchema.default("5").describe("Output duration"),
+  resolution: resolutionSchema.default("480p").describe("Output resolution"),
+});
+// Output schema with Zod
+const syncOutputSchema = z.object({
+  videoUrl: z.string(),
+});
+// Schema object for the definition
+const schema: ZodSchema<typeof syncInputSchema, typeof syncOutputSchema> = {
+  input: syncInputSchema,
+  output: syncOutputSchema,
+};
+export const definition: ActionDefinition<typeof schema> = {
+  type: "action",
+  name: "sync",
+  description: "Lip sync audio to video/image",
+  schema,
+  routes: [],
+  execute: async (inputs) => {
+    const { image, audio, prompt, duration, resolution } = inputs;
+    return lipsync({ image, audio, prompt, duration, resolution });
+  },
+};
+// Types
+export interface LipsyncOptions {
+  image: string;
+  audio: string;
+  prompt: string;
+  duration?: "5" | "10";
+  resolution?: "480p" | "720p" | "1080p";
+}
+export interface LipsyncResult {
+  videoUrl: string;
+}
+export interface Wav2LipOptions {
+  videoPath: string;
+  audioPath: string;
+  outputPath: string;
+}
+/**
+ * Generate lip-synced video using Wan-25
+ */
+export async function lipsync(options: LipsyncOptions): Promise<LipsyncResult> {
+  const { image, audio, prompt, duration = "5", resolution = "480p" } = options;
+  console.log("[sync] generating lip-synced video with wan-25...");
+  const result = await falProvider.wan25({
+    imageUrl: image,
+    audioUrl: audio,
+    prompt,
+    duration,
+    resolution,
+  });
+  const videoUrl = result.data?.video?.url;
+  if (!videoUrl) {
+    throw new Error("No video URL in result");
+  }
+  return { videoUrl };
+}
+/**
+ * Overlay lip-synced face onto original video
+ */
+export async function lipsyncOverlay(options: {
+  originalVideo: string;
+  lipsyncedVideo: string;
+  outputPath: string;
+}): Promise<string> {
+  const { lipsyncedVideo, outputPath } = options;
+  console.log("[sync] overlaying lip-synced video...");
+  // This would require more complex ffmpeg operations
+  // For now, just return the lip-synced video as-is
+  await ffmpegProvider.convertFormat({
+    input: lipsyncedVideo,
+    output: outputPath,
+  });
+  return outputPath;
+}
+/**
+ * Wav2Lip-style lip sync (placeholder for future implementation)
+ */
+export async function lipsyncWav2Lip(options: Wav2LipOptions): Promise<string> {
+  console.warn("[sync] wav2lip not yet implemented, using wan-25 fallback");
+  // For now, just copy the video
+  await ffmpegProvider.convertFormat({
+    input: options.videoPath,
+    output: options.outputPath,
+  });
+  return options.outputPath;
+}
+export default definition;

package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} RENAMED Viewed

@@ -1,22 +1,63 @@
-#!/usr/bin/env bun
 /**
- * audio transcription service
- * supports groq whisper, fireworks api, and future providers
+ * Transcription action
+ * Speech-to-text via Groq or Fireworks
  */
 import { writeFileSync } from "node:fs";
-import { join } from "node:path";
 import { toFile } from "groq-sdk/uploads";
+import { z } from "zod";
+import {
+  filePathSchema,
+  transcriptionProviderSchema,
+} from "../../core/schema/shared";
+import type { ActionDefinition, ZodSchema } from "../../core/schema/types";
 import {
   convertFireworksToSRT,
-  transcribeWithFireworks as fireworksTranscribe,
-} from "../../lib/fireworks";
-import { GROQ_MODELS, transcribeAudio as groqTranscribe } from "../../lib/groq";
+  fireworksProvider,
+} from "../../providers/fireworks";
+import { GROQ_MODELS, groqProvider } from "../../providers/groq";
+// Input schema with Zod
+const transcribeInputSchema = z.object({
+  audio: filePathSchema.describe("Audio/video file to transcribe"),
+  provider: transcriptionProviderSchema
+    .default("groq")
+    .describe("Transcription provider"),
+  output: filePathSchema.optional().describe("Output file path"),
+});
+// Output schema with Zod
+const transcribeOutputSchema = z.object({
+  success: z.boolean(),
+  text: z.string().optional(),
+  srt: z.string().optional(),
+  error: z.string().optional(),
+});
+// Schema object for the definition
+const schema: ZodSchema<
+  typeof transcribeInputSchema,
+  typeof transcribeOutputSchema
+> = {
+  input: transcribeInputSchema,
+  output: transcribeOutputSchema,
+};
+export const definition: ActionDefinition<typeof schema> = {
+  type: "action",
+  name: "transcribe",
+  description: "Speech to text transcription",
+  schema,
+  routes: [],
+  execute: async (inputs) => {
+    const { audio, provider, output } = inputs;
+    return transcribe({ audioUrl: audio, provider, outputPath: output });
+  },
+};
-// types
+// Types
 export interface TranscribeOptions {
-  audioUrl: string; // url or local file path
+  audioUrl: string;
   provider?: "groq" | "fireworks";
   model?: string;
   language?: string;
@@ -31,7 +72,7 @@ export interface TranscribeResult {
   error?: string;
 }
-// groq transcription
+// Groq transcription
 async function transcribeWithGroq(
   audioUrl: string,
   options: {
@@ -43,16 +84,14 @@ async function transcribeWithGroq(
   try {
     console.log("[transcribe] using groq whisper...");
-    // load audio file (local or remote)
+    // Load audio file
     let audioBuffer: ArrayBuffer;
     let fileName = "audio.mp3";
     if (audioUrl.startsWith("http://") || audioUrl.startsWith("https://")) {
-      // fetch remote file
       const audioResponse = await fetch(audioUrl);
       audioBuffer = await audioResponse.arrayBuffer();
     } else {
-      // read local file with bun
       const file = Bun.file(audioUrl);
       audioBuffer = await file.arrayBuffer();
       fileName = audioUrl.split("/").pop() || "audio.mp3";
@@ -60,8 +99,7 @@ async function transcribeWithGroq(
     const audioFile = await toFile(audioBuffer, fileName);
-    // transcribe with groq
-    const text = await groqTranscribe({
+    const text = await groqProvider.transcribeAudio({
       file: audioFile,
       model: options.model || GROQ_MODELS.WHISPER_LARGE,
       language: options.language,
@@ -70,10 +108,8 @@ async function transcribeWithGroq(
     console.log("[transcribe] groq transcription complete");
     if (options.outputFormat === "srt") {
-      // groq returns plain text, so we need to convert to srt
-      // for now just return text with warning
       console.warn(
-        "[transcribe] groq returns plain text, use fireworks for srt format",
+        "[transcribe] groq returns plain text, use fireworks for SRT format",
       );
       return { success: true, text, srt: text };
     }
@@ -84,19 +120,19 @@ async function transcribeWithGroq(
     return {
       success: false,
       error:
-        error instanceof Error ? error.message : "groq transcription failed",
+        error instanceof Error ? error.message : "Groq transcription failed",
     };
   }
 }
-// fireworks transcription (with srt support)
+// Fireworks transcription (with SRT support)
 async function transcribeWithFireworks(
   audioUrl: string,
 ): Promise<TranscribeResult> {
   try {
     console.log("[transcribe] using fireworks api...");
-    const data = await fireworksTranscribe({
+    const data = await fireworksProvider.transcribe({
       audioPath: audioUrl,
     });
@@ -111,12 +147,12 @@ async function transcribeWithFireworks(
       error:
         error instanceof Error
           ? error.message
-          : "fireworks transcription failed",
+          : "Fireworks transcription failed",
     };
   }
 }
-// main transcription function
+// Main transcription function
 export async function transcribe(
   options: TranscribeOptions,
 ): Promise<TranscribeResult> {
@@ -137,7 +173,6 @@ export async function transcribe(
   let result: TranscribeResult;
-  // choose provider
   if (provider === "groq") {
     result = await transcribeWithGroq(audioUrl, {
       model,
@@ -147,10 +182,10 @@ export async function transcribe(
   } else if (provider === "fireworks") {
     result = await transcribeWithFireworks(audioUrl);
   } else {
-    throw new Error(`unknown provider: ${provider}`);
+    throw new Error(`Unknown provider: ${provider}`);
   }
-  // save to file if requested
+  // Save to file if requested
   if (result.success && outputPath) {
     const content = outputFormat === "srt" ? result.srt : result.text;
     if (content) {
@@ -162,66 +197,4 @@ export async function transcribe(
   return result;
 }
-// cli
-async function cli() {
-  const args = process.argv.slice(2);
-  const command = args[0];
-  if (!command || command === "help") {
-    console.log(`
-usage:
-  bun run service/transcribe.ts <audioPath> [provider] [outputPath]
-arguments:
-  audioPath      - url or local path to audio file
-  provider       - groq (default) | fireworks
-  outputPath     - optional path to save transcription
-examples:
-  bun run service/transcribe.ts https://example.com/audio.mp3
-  bun run service/transcribe.ts media/dora.ogg groq
-  bun run service/transcribe.ts https://example.com/audio.mp3 fireworks output.srt
-  bun run service/transcribe.ts media/audio.mp3 groq output.txt
-providers:
-  groq        - ultra-fast whisper (text only, free tier available)
-  fireworks   - slower but includes srt timestamps (uses reels-srt api)
-environment:
-  GROQ_API_KEY - your groq api key (for groq provider)
-    `);
-    process.exit(0);
-  }
-  try {
-    const audioUrl = args[0];
-    const provider = (args[1] || "groq") as "groq" | "fireworks";
-    const outputPath = args[2];
-    if (!audioUrl) {
-      throw new Error("audioUrl is required");
-    }
-    const result = await transcribe({
-      audioUrl,
-      provider,
-      outputFormat: provider === "fireworks" ? "srt" : "text",
-      outputPath: outputPath || join(process.cwd(), "output.txt"),
-    });
-    if (result.success) {
-      console.log("\ntranscription:");
-      console.log(result.srt || result.text);
-    } else {
-      console.error(`\nerror: ${result.error}`);
-      process.exit(1);
-    }
-  } catch (error) {
-    console.error("[transcribe] error:", error);
-    process.exit(1);
-  }
-}
-if (import.meta.main) {
-  cli();
-}
+export default definition;