npm - @zeke/obsx - Versions diffs - 1.0.3 → 1.1.0 - Mend

@zeke/obsx 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +34 -0
package/dist/cli.js +7 -0
package/dist/commands/add-webcam.js +21 -4
package/dist/commands/yolo.js +219 -0
package/package.json +3 -2

package/README.md CHANGED Viewed

@@ -64,3 +64,37 @@ Use a specific directory:
 ```sh
 obsx add-images --dir "$PWD"
 ```
+## yolo
+Use natural language to control OBS. This sends your prompt to Claude along with the current state of your OBS instance, and executes the generated commands.
+Requires the `ANTHROPIC_API_KEY` environment variable.
+```sh
+obsx yolo "start recording"
+obsx yolo "switch to the BRB scene"
+obsx yolo "hide the webcam"
+obsx yolo "add a color source called 'Red Background' to the current scene"
+obsx yolo "mute the mic"
+obsx yolo "set the transition to fade and make it 500ms"
+obsx yolo "move the webcam to the bottom right corner"
+obsx yolo "take a screenshot of the current scene"
+obsx yolo "create a new scene called Interview with two color sources side by side"
+```
+## Development
+Run locally from the repo without publishing:
+```sh
+npm run dev -- <command>
+```
+For example:
+```sh
+npm run dev -- add-webcam
+npm run dev -- add-webcam --device iphone
+npm run dev -- add-images
+```

package/dist/cli.js CHANGED Viewed

@@ -2,6 +2,7 @@
 import process from "node:process";
 import { addImages } from "./commands/add-images.js";
 import { addWebcam } from "./commands/add-webcam.js";
+import { yolo } from "./commands/yolo.js";
 function printHelp() {
     console.log(`obsx - A CLI for OBS
@@ -15,11 +16,13 @@ Environment:
 Commands:
   add-images   Add image sources for images in a directory (default: cwd)
   add-webcam   Add a webcam input to the current scene
+  yolo         Use AI to control OBS with natural language
 Examples:
   obsx add-images
   obsx add-images --dir /path/to/images
   obsx add-webcam --interactive
+  obsx yolo "switch to the Gaming scene"
 `);
 }
 async function run(argv) {
@@ -37,6 +40,10 @@ async function run(argv) {
         await addWebcam(rest);
         return;
     }
+    if (command === "yolo") {
+        await yolo(rest);
+        return;
+    }
     console.error(`Unknown command: ${maybeCommand}`);
     printHelp();
     process.exitCode = 1;

package/dist/commands/add-webcam.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { getObsConnectionOptionsFromEnv, withOBS } from "../lib/obs.js";
 const DEFAULTS = {
     interactive: false,
     baseName: "Video Capture Device",
+    baseNameExplicit: false,
     inputKind: undefined,
     deviceSelection: undefined,
     addChromaKey: true,
@@ -30,6 +31,7 @@ function parseArgs(argv) {
         }
         if (arg === "--base-name" && typeof next === "string") {
             out.baseName = next;
+            out.baseNameExplicit = true;
             i += 1;
             continue;
         }
@@ -189,6 +191,7 @@ async function resolveOptionsInteractive(initial) {
     const rl = createInterface({ input: process.stdin, output: process.stdout });
     try {
         const baseName = await ask(rl, "Base source name", initial.baseName);
+        const baseNameExplicit = baseName !== initial.baseName;
         const addChromaKey = await askYesNo(rl, "Add Chroma Key filter?", initial.addChromaKey);
         const addColorCorrection = await askYesNo(rl, "Add Color Correction filter?", initial.addColorCorrection);
         let saturation = initial.saturation;
@@ -202,6 +205,7 @@ async function resolveOptionsInteractive(initial) {
         return {
             ...initial,
             baseName,
+            baseNameExplicit: initial.baseNameExplicit || baseNameExplicit,
             addChromaKey,
             addColorCorrection,
             saturation,
@@ -305,9 +309,22 @@ export async function addWebcam(argv) {
             },
             overlay: true,
         });
+        // Rename the source based on the device name, unless the user
+        // explicitly provided --base-name.
+        let finalName = inputName;
+        if (!options.baseNameExplicit && device.itemName) {
+            const desiredName = await uniqueInputName(obs, device.itemName);
+            if (desiredName !== inputName) {
+                await obs.call("SetInputName", {
+                    inputName,
+                    newInputName: desiredName,
+                });
+                finalName = desiredName;
+            }
+        }
         if (options.addChromaKey) {
             await obs.call("CreateSourceFilter", {
-                sourceName: inputName,
+                sourceName: finalName,
                 filterName: "Chroma Key",
                 filterKind: "chroma_key_filter",
                 filterSettings: {},
@@ -315,7 +332,7 @@ export async function addWebcam(argv) {
         }
         if (options.addColorCorrection) {
             await obs.call("CreateSourceFilter", {
-                sourceName: inputName,
+                sourceName: finalName,
                 filterName: "Color Correction",
                 filterKind: "color_filter",
                 filterSettings: {
@@ -325,7 +342,7 @@ export async function addWebcam(argv) {
             });
         }
         const filters = await obs.call("GetSourceFilterList", {
-            sourceName: inputName,
+            sourceName: finalName,
         });
         const filterSummaries = filters.filters.map((filter) => ({
             name: filter.filterName,
@@ -333,7 +350,7 @@ export async function addWebcam(argv) {
             enabled: filter.filterEnabled,
         }));
         console.log("Filters:", filterSummaries);
-        console.log("Created input:", inputName);
+        console.log("Created input:", finalName);
         console.log("Scene:", sceneName);
         console.log("Input kind:", inputKind);
         console.log("Device:", device.itemName);

package/dist/commands/yolo.js ADDED Viewed

@@ -0,0 +1,219 @@
+import process from "node:process";
+import Anthropic from "@anthropic-ai/sdk";
+import { getObsConnectionOptionsFromEnv, withOBS } from "../lib/obs.js";
+const MAX_ATTEMPTS = 3;
+const SYSTEM_PROMPT = `You are an OBS Studio automation assistant. You receive a user's natural language request and the current state of their OBS instance, and you respond with a JSON array of OBS WebSocket v5 API calls to fulfill the request.
+Each call is an object with "requestType" (string) and optional "requestData" (object).
+Available request types (most common ones):
+Scenes: GetSceneList, GetCurrentProgramScene, SetCurrentProgramScene (sceneName), CreateScene (sceneName), RemoveScene (sceneName), SetSceneName (sceneName, newSceneName)
+Inputs: GetInputList, CreateInput (sceneName, inputName, inputKind, inputSettings?, sceneItemEnabled?), RemoveInput (inputName), SetInputName (inputName, newInputName), GetInputSettings (inputName), SetInputSettings (inputName, inputSettings, overlay?), SetInputMute (inputName, inputMuted), ToggleInputMute (inputName), SetInputVolume (inputName, inputVolumeMul? or inputVolumeDb?), GetInputKindList
+Scene Items: GetSceneItemList (sceneName), GetSceneItemId (sceneName, sourceName), SetSceneItemEnabled (sceneName, sceneItemId, sceneItemEnabled), SetSceneItemTransform (sceneName, sceneItemId, sceneItemTransform), SetSceneItemIndex (sceneName, sceneItemId, sceneItemIndex), SetSceneItemLocked (sceneName, sceneItemId, sceneItemLocked), RemoveSceneItem (sceneName, sceneItemId), SetSceneItemBlendMode (sceneName, sceneItemId, sceneItemBlendMode)
+Filters: GetSourceFilterList (sourceName), CreateSourceFilter (sourceName, filterName, filterKind, filterSettings?), RemoveSourceFilter (sourceName, filterName), SetSourceFilterEnabled (sourceName, filterName, filterEnabled), SetSourceFilterSettings (sourceName, filterName, filterSettings, overlay?)
+Streaming/Recording: StartStream, StopStream, ToggleStream, StartRecord, StopRecord, ToggleRecord, PauseRecord, ResumeRecord, GetStreamStatus, GetRecordStatus
+Transitions: GetSceneTransitionList, SetCurrentSceneTransition (transitionName), SetCurrentSceneTransitionDuration (transitionDuration)
+General: GetVersion, GetStats, GetVideoSettings, SetVideoSettings (baseWidth, baseHeight, outputWidth, outputHeight, fpsNumerator, fpsDenominator)
+Virtual Camera: StartVirtualCam, StopVirtualCam, ToggleVirtualCam
+Studio Mode: GetStudioModeEnabled, SetStudioModeEnabled (studioModeEnabled), SetCurrentPreviewScene (sceneName)
+Common input kinds (macOS): av_capture_input (video capture), coreaudio_input_capture (audio input), coreaudio_output_capture (audio output), image_source, color_source_v3, text_ft2_source_v2, browser_source, ffmpeg_source (media), window_capture, display_capture
+Transform properties: positionX, positionY, scaleX, scaleY, rotation, boundsType (OBS_BOUNDS_NONE, OBS_BOUNDS_STRETCH, OBS_BOUNDS_SCALE_INNER, OBS_BOUNDS_SCALE_OUTER, OBS_BOUNDS_SCALE_TO_WIDTH, OBS_BOUNDS_SCALE_TO_HEIGHT, OBS_BOUNDS_MAX_ONLY), boundsWidth, boundsHeight, cropLeft, cropRight, cropTop, cropBottom, alignment
+Rules:
+- Respond with ONLY a JSON array. No explanation, no markdown fences, no extra text.
+- Each element must have "requestType" and optionally "requestData".
+- The calls will be executed sequentially in order.
+- Use the current OBS state provided to reference correct scene names, input names, and scene item IDs.
+- If you need to get information first (like a sceneItemId), you cannot do that in this single response. Use the state provided.
+- Be practical: if asked to "hide" something, use SetSceneItemEnabled with false. If asked to "show", use true.
+- For positioning, the canvas origin (0,0) is top-left.`;
+async function gatherObsState(obs) {
+    const parts = [];
+    try {
+        const version = await obs.call("GetVersion");
+        parts.push(`OBS Version: ${version.obsVersion}, Platform: ${version.platform}`);
+    }
+    catch {
+        // ignore
+    }
+    try {
+        const video = await obs.call("GetVideoSettings");
+        parts.push(`Canvas: ${video.baseWidth}x${video.baseHeight}, Output: ${video.outputWidth}x${video.outputHeight}`);
+    }
+    catch {
+        // ignore
+    }
+    try {
+        const scenes = await obs.call("GetSceneList");
+        parts.push(`Current scene: ${scenes.currentProgramSceneName}`);
+        parts.push(`Scenes: ${JSON.stringify(scenes.scenes)}`);
+    }
+    catch {
+        // ignore
+    }
+    try {
+        const current = await obs.call("GetCurrentProgramScene");
+        const items = await obs.call("GetSceneItemList", {
+            sceneName: current.currentProgramSceneName,
+        });
+        parts.push(`Scene items in "${current.currentProgramSceneName}": ${JSON.stringify(items.sceneItems)}`);
+    }
+    catch {
+        // ignore
+    }
+    try {
+        const inputs = await obs.call("GetInputList");
+        parts.push(`Inputs: ${JSON.stringify(inputs.inputs)}`);
+    }
+    catch {
+        // ignore
+    }
+    try {
+        const stream = await obs.call("GetStreamStatus");
+        parts.push(`Stream: active=${stream.outputActive}`);
+    }
+    catch {
+        // ignore
+    }
+    try {
+        const record = await obs.call("GetRecordStatus");
+        parts.push(`Record: active=${record.outputActive}, paused=${record.outputPaused}`);
+    }
+    catch {
+        // ignore
+    }
+    return parts.join("\n");
+}
+function parseCallsFromResponse(text) {
+    // Strip markdown fences if the model wrapped the response
+    let cleaned = text.trim();
+    if (cleaned.startsWith("```")) {
+        cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, "").replace(/\n?```\s*$/, "");
+    }
+    const parsed = JSON.parse(cleaned);
+    if (!Array.isArray(parsed)) {
+        throw new Error("Expected a JSON array of OBS calls");
+    }
+    return parsed.map((item, i) => {
+        if (typeof item !== "object" || item === null || !("requestType" in item)) {
+            throw new Error(`Call at index ${i} is missing "requestType"`);
+        }
+        const obj = item;
+        return {
+            requestType: obj.requestType,
+            requestData: obj.requestData ?? undefined,
+        };
+    });
+}
+export async function yolo(argv) {
+    const prompt = argv.join(" ").trim();
+    if (!prompt) {
+        console.error("Usage: obsx yolo <prompt>");
+        console.error('Example: obsx yolo "switch to the Gaming scene"');
+        process.exitCode = 1;
+        return;
+    }
+    const apiKey = process.env.ANTHROPIC_API_KEY;
+    if (!apiKey) {
+        console.error("ANTHROPIC_API_KEY environment variable is required for the yolo command.");
+        process.exitCode = 1;
+        return;
+    }
+    const anthropic = new Anthropic({ apiKey });
+    await withOBS(getObsConnectionOptionsFromEnv(), async (obs) => {
+        const messages = [];
+        let failedResults = [];
+        for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+            const state = await gatherObsState(obs);
+            if (attempt === 1) {
+                console.log("Asking Claude...");
+                messages.push({
+                    role: "user",
+                    content: `Current OBS state:\n${state}\n\nRequest: ${prompt}`,
+                });
+            }
+            else {
+                // On retries, the previous assistant response is already in messages.
+                // Add a user message with the errors and fresh state.
+                messages.push({
+                    role: "user",
+                    content: `Some calls failed. Here are the errors:\n${failedResults.map((r) => `- ${r.call.requestType}${r.call.requestData ? " " + JSON.stringify(r.call.requestData) : ""}: ${r.error}`).join("\n")}\n\nUpdated OBS state:\n${state}\n\nPlease generate a corrected JSON array of OBS calls to complete the original request. Only include calls that still need to succeed — don't repeat calls that already worked.`,
+                });
+            }
+            const message = await anthropic.messages.create({
+                model: "claude-sonnet-4-20250514",
+                max_tokens: 4096,
+                system: SYSTEM_PROMPT,
+                messages,
+            });
+            const responseText = message.content[0]?.type === "text" ? message.content[0].text : "";
+            // Keep conversation history for potential retries.
+            messages.push({ role: "assistant", content: responseText });
+            if (!responseText) {
+                console.error("No response from Claude.");
+                process.exitCode = 1;
+                return;
+            }
+            let calls;
+            try {
+                calls = parseCallsFromResponse(responseText);
+            }
+            catch (err) {
+                console.error("Failed to parse Claude's response as OBS calls:");
+                console.error(responseText);
+                console.error(err instanceof Error ? err.message : String(err));
+                process.exitCode = 1;
+                return;
+            }
+            if (!calls.length) {
+                console.log("No OBS calls to execute.");
+                return;
+            }
+            const label = attempt > 1 ? ` (attempt ${attempt}/${MAX_ATTEMPTS})` : "";
+            console.log(`Executing ${calls.length} OBS call(s)${label}:\n`);
+            failedResults = [];
+            for (const call of calls) {
+                const dataStr = call.requestData
+                    ? ` ${JSON.stringify(call.requestData)}`
+                    : "";
+                console.log(`  ${call.requestType}${dataStr}`);
+                try {
+                    const result = await obs.call(call.requestType, call.requestData);
+                    if (result !== undefined && result !== null) {
+                        const resultStr = JSON.stringify(result);
+                        if (resultStr !== "{}" && resultStr !== "undefined") {
+                            console.log(`    -> ${resultStr}`);
+                        }
+                    }
+                }
+                catch (err) {
+                    const msg = err instanceof Error ? err.message : String(err);
+                    console.error(`    !! Error: ${msg}`);
+                    failedResults.push({ call, error: msg });
+                }
+            }
+            if (!failedResults.length) {
+                console.log("\nDone.");
+                return;
+            }
+            if (attempt < MAX_ATTEMPTS) {
+                console.log(`\n${failedResults.length} call(s) failed. Retrying with error feedback...`);
+            }
+            else {
+                console.error(`\n${failedResults.length} call(s) still failing after ${MAX_ATTEMPTS} attempts.`);
+                process.exitCode = 1;
+            }
+        }
+    });
+}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@zeke/obsx",
   "description": "A CLI for OBS",
-  "version": "1.0.3",
+  "version": "1.1.0",
   "license": "MIT",
   "repository": "https://github.com/zeke/obsx",
   "type": "module",
@@ -23,11 +23,12 @@
     "access": "public"
   },
   "dependencies": {
+    "@anthropic-ai/sdk": "^0.74.0",
     "obs-websocket-js": "^5.0.4"
   },
   "devDependencies": {
-    "@types/node": "^20.11.30",
     "@eslint/js": "^9.20.0",
+    "@types/node": "^20.11.30",
     "eslint": "^9.20.1",
     "tsx": "^4.19.1",
     "typescript": "^5.4.2",