@zeke/obsx 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -64,3 +64,37 @@ Use a specific directory:
64
64
  ```sh
65
65
  obsx add-images --dir "$PWD"
66
66
  ```
67
+
68
+ ## yolo
69
+
70
+ Use natural language to control OBS. This sends your prompt to Claude along with the current state of your OBS instance, and executes the generated commands.
71
+
72
+ Requires the `ANTHROPIC_API_KEY` environment variable.
73
+
74
+ ```sh
75
+ obsx yolo "start recording"
76
+ obsx yolo "switch to the BRB scene"
77
+ obsx yolo "hide the webcam"
78
+ obsx yolo "add a color source called 'Red Background' to the current scene"
79
+ obsx yolo "mute the mic"
80
+ obsx yolo "set the transition to fade and make it 500ms"
81
+ obsx yolo "move the webcam to the bottom right corner"
82
+ obsx yolo "take a screenshot of the current scene"
83
+ obsx yolo "create a new scene called Interview with two color sources side by side"
84
+ ```
85
+
86
+ ## Development
87
+
88
+ Run locally from the repo without publishing:
89
+
90
+ ```sh
91
+ npm run dev -- <command>
92
+ ```
93
+
94
+ For example:
95
+
96
+ ```sh
97
+ npm run dev -- add-webcam
98
+ npm run dev -- add-webcam --device iphone
99
+ npm run dev -- add-images
100
+ ```
package/dist/cli.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import process from "node:process";
3
3
  import { addImages } from "./commands/add-images.js";
4
4
  import { addWebcam } from "./commands/add-webcam.js";
5
+ import { yolo } from "./commands/yolo.js";
5
6
  function printHelp() {
6
7
  console.log(`obsx - A CLI for OBS
7
8
 
@@ -15,11 +16,13 @@ Environment:
15
16
  Commands:
16
17
  add-images Add image sources for images in a directory (default: cwd)
17
18
  add-webcam Add a webcam input to the current scene
19
+ yolo Use AI to control OBS with natural language
18
20
 
19
21
  Examples:
20
22
  obsx add-images
21
23
  obsx add-images --dir /path/to/images
22
24
  obsx add-webcam --interactive
25
+ obsx yolo "switch to the Gaming scene"
23
26
  `);
24
27
  }
25
28
  async function run(argv) {
@@ -37,6 +40,10 @@ async function run(argv) {
37
40
  await addWebcam(rest);
38
41
  return;
39
42
  }
43
+ if (command === "yolo") {
44
+ await yolo(rest);
45
+ return;
46
+ }
40
47
  console.error(`Unknown command: ${maybeCommand}`);
41
48
  printHelp();
42
49
  process.exitCode = 1;
@@ -4,6 +4,7 @@ import { getObsConnectionOptionsFromEnv, withOBS } from "../lib/obs.js";
4
4
  const DEFAULTS = {
5
5
  interactive: false,
6
6
  baseName: "Video Capture Device",
7
+ baseNameExplicit: false,
7
8
  inputKind: undefined,
8
9
  deviceSelection: undefined,
9
10
  addChromaKey: true,
@@ -30,6 +31,7 @@ function parseArgs(argv) {
30
31
  }
31
32
  if (arg === "--base-name" && typeof next === "string") {
32
33
  out.baseName = next;
34
+ out.baseNameExplicit = true;
33
35
  i += 1;
34
36
  continue;
35
37
  }
@@ -189,6 +191,7 @@ async function resolveOptionsInteractive(initial) {
189
191
  const rl = createInterface({ input: process.stdin, output: process.stdout });
190
192
  try {
191
193
  const baseName = await ask(rl, "Base source name", initial.baseName);
194
+ const baseNameExplicit = baseName !== initial.baseName;
192
195
  const addChromaKey = await askYesNo(rl, "Add Chroma Key filter?", initial.addChromaKey);
193
196
  const addColorCorrection = await askYesNo(rl, "Add Color Correction filter?", initial.addColorCorrection);
194
197
  let saturation = initial.saturation;
@@ -202,6 +205,7 @@ async function resolveOptionsInteractive(initial) {
202
205
  return {
203
206
  ...initial,
204
207
  baseName,
208
+ baseNameExplicit: initial.baseNameExplicit || baseNameExplicit,
205
209
  addChromaKey,
206
210
  addColorCorrection,
207
211
  saturation,
@@ -305,9 +309,22 @@ export async function addWebcam(argv) {
305
309
  },
306
310
  overlay: true,
307
311
  });
312
+ // Rename the source based on the device name, unless the user
313
+ // explicitly provided --base-name.
314
+ let finalName = inputName;
315
+ if (!options.baseNameExplicit && device.itemName) {
316
+ const desiredName = await uniqueInputName(obs, device.itemName);
317
+ if (desiredName !== inputName) {
318
+ await obs.call("SetInputName", {
319
+ inputName,
320
+ newInputName: desiredName,
321
+ });
322
+ finalName = desiredName;
323
+ }
324
+ }
308
325
  if (options.addChromaKey) {
309
326
  await obs.call("CreateSourceFilter", {
310
- sourceName: inputName,
327
+ sourceName: finalName,
311
328
  filterName: "Chroma Key",
312
329
  filterKind: "chroma_key_filter",
313
330
  filterSettings: {},
@@ -315,7 +332,7 @@ export async function addWebcam(argv) {
315
332
  }
316
333
  if (options.addColorCorrection) {
317
334
  await obs.call("CreateSourceFilter", {
318
- sourceName: inputName,
335
+ sourceName: finalName,
319
336
  filterName: "Color Correction",
320
337
  filterKind: "color_filter",
321
338
  filterSettings: {
@@ -325,7 +342,7 @@ export async function addWebcam(argv) {
325
342
  });
326
343
  }
327
344
  const filters = await obs.call("GetSourceFilterList", {
328
- sourceName: inputName,
345
+ sourceName: finalName,
329
346
  });
330
347
  const filterSummaries = filters.filters.map((filter) => ({
331
348
  name: filter.filterName,
@@ -333,7 +350,7 @@ export async function addWebcam(argv) {
333
350
  enabled: filter.filterEnabled,
334
351
  }));
335
352
  console.log("Filters:", filterSummaries);
336
- console.log("Created input:", inputName);
353
+ console.log("Created input:", finalName);
337
354
  console.log("Scene:", sceneName);
338
355
  console.log("Input kind:", inputKind);
339
356
  console.log("Device:", device.itemName);
@@ -0,0 +1,219 @@
1
+ import process from "node:process";
2
+ import Anthropic from "@anthropic-ai/sdk";
3
+ import { getObsConnectionOptionsFromEnv, withOBS } from "../lib/obs.js";
4
+ const MAX_ATTEMPTS = 3;
5
+ const SYSTEM_PROMPT = `You are an OBS Studio automation assistant. You receive a user's natural language request and the current state of their OBS instance, and you respond with a JSON array of OBS WebSocket v5 API calls to fulfill the request.
6
+
7
+ Each call is an object with "requestType" (string) and optional "requestData" (object).
8
+
9
+ Available request types (most common ones):
10
+
11
+ Scenes: GetSceneList, GetCurrentProgramScene, SetCurrentProgramScene (sceneName), CreateScene (sceneName), RemoveScene (sceneName), SetSceneName (sceneName, newSceneName)
12
+
13
+ Inputs: GetInputList, CreateInput (sceneName, inputName, inputKind, inputSettings?, sceneItemEnabled?), RemoveInput (inputName), SetInputName (inputName, newInputName), GetInputSettings (inputName), SetInputSettings (inputName, inputSettings, overlay?), SetInputMute (inputName, inputMuted), ToggleInputMute (inputName), SetInputVolume (inputName, inputVolumeMul? or inputVolumeDb?), GetInputKindList
14
+
15
+ Scene Items: GetSceneItemList (sceneName), GetSceneItemId (sceneName, sourceName), SetSceneItemEnabled (sceneName, sceneItemId, sceneItemEnabled), SetSceneItemTransform (sceneName, sceneItemId, sceneItemTransform), SetSceneItemIndex (sceneName, sceneItemId, sceneItemIndex), SetSceneItemLocked (sceneName, sceneItemId, sceneItemLocked), RemoveSceneItem (sceneName, sceneItemId), SetSceneItemBlendMode (sceneName, sceneItemId, sceneItemBlendMode)
16
+
17
+ Filters: GetSourceFilterList (sourceName), CreateSourceFilter (sourceName, filterName, filterKind, filterSettings?), RemoveSourceFilter (sourceName, filterName), SetSourceFilterEnabled (sourceName, filterName, filterEnabled), SetSourceFilterSettings (sourceName, filterName, filterSettings, overlay?)
18
+
19
+ Streaming/Recording: StartStream, StopStream, ToggleStream, StartRecord, StopRecord, ToggleRecord, PauseRecord, ResumeRecord, GetStreamStatus, GetRecordStatus
20
+
21
+ Transitions: GetSceneTransitionList, SetCurrentSceneTransition (transitionName), SetCurrentSceneTransitionDuration (transitionDuration)
22
+
23
+ General: GetVersion, GetStats, GetVideoSettings, SetVideoSettings (baseWidth, baseHeight, outputWidth, outputHeight, fpsNumerator, fpsDenominator)
24
+
25
+ Virtual Camera: StartVirtualCam, StopVirtualCam, ToggleVirtualCam
26
+
27
+ Studio Mode: GetStudioModeEnabled, SetStudioModeEnabled (studioModeEnabled), SetCurrentPreviewScene (sceneName)
28
+
29
+ Common input kinds (macOS): av_capture_input (video capture), coreaudio_input_capture (audio input), coreaudio_output_capture (audio output), image_source, color_source_v3, text_ft2_source_v2, browser_source, ffmpeg_source (media), window_capture, display_capture
30
+
31
+ Transform properties: positionX, positionY, scaleX, scaleY, rotation, boundsType (OBS_BOUNDS_NONE, OBS_BOUNDS_STRETCH, OBS_BOUNDS_SCALE_INNER, OBS_BOUNDS_SCALE_OUTER, OBS_BOUNDS_SCALE_TO_WIDTH, OBS_BOUNDS_SCALE_TO_HEIGHT, OBS_BOUNDS_MAX_ONLY), boundsWidth, boundsHeight, cropLeft, cropRight, cropTop, cropBottom, alignment
32
+
33
+ Rules:
34
+ - Respond with ONLY a JSON array. No explanation, no markdown fences, no extra text.
35
+ - Each element must have "requestType" and optionally "requestData".
36
+ - The calls will be executed sequentially in order.
37
+ - Use the current OBS state provided to reference correct scene names, input names, and scene item IDs.
38
+ - If you need to get information first (like a sceneItemId), you cannot do that in this single response. Use the state provided.
39
+ - Be practical: if asked to "hide" something, use SetSceneItemEnabled with false. If asked to "show", use true.
40
+ - For positioning, the canvas origin (0,0) is top-left.`;
41
+ async function gatherObsState(obs) {
42
+ const parts = [];
43
+ try {
44
+ const version = await obs.call("GetVersion");
45
+ parts.push(`OBS Version: ${version.obsVersion}, Platform: ${version.platform}`);
46
+ }
47
+ catch {
48
+ // ignore
49
+ }
50
+ try {
51
+ const video = await obs.call("GetVideoSettings");
52
+ parts.push(`Canvas: ${video.baseWidth}x${video.baseHeight}, Output: ${video.outputWidth}x${video.outputHeight}`);
53
+ }
54
+ catch {
55
+ // ignore
56
+ }
57
+ try {
58
+ const scenes = await obs.call("GetSceneList");
59
+ parts.push(`Current scene: ${scenes.currentProgramSceneName}`);
60
+ parts.push(`Scenes: ${JSON.stringify(scenes.scenes)}`);
61
+ }
62
+ catch {
63
+ // ignore
64
+ }
65
+ try {
66
+ const current = await obs.call("GetCurrentProgramScene");
67
+ const items = await obs.call("GetSceneItemList", {
68
+ sceneName: current.currentProgramSceneName,
69
+ });
70
+ parts.push(`Scene items in "${current.currentProgramSceneName}": ${JSON.stringify(items.sceneItems)}`);
71
+ }
72
+ catch {
73
+ // ignore
74
+ }
75
+ try {
76
+ const inputs = await obs.call("GetInputList");
77
+ parts.push(`Inputs: ${JSON.stringify(inputs.inputs)}`);
78
+ }
79
+ catch {
80
+ // ignore
81
+ }
82
+ try {
83
+ const stream = await obs.call("GetStreamStatus");
84
+ parts.push(`Stream: active=${stream.outputActive}`);
85
+ }
86
+ catch {
87
+ // ignore
88
+ }
89
+ try {
90
+ const record = await obs.call("GetRecordStatus");
91
+ parts.push(`Record: active=${record.outputActive}, paused=${record.outputPaused}`);
92
+ }
93
+ catch {
94
+ // ignore
95
+ }
96
+ return parts.join("\n");
97
+ }
98
+ function parseCallsFromResponse(text) {
99
+ // Strip markdown fences if the model wrapped the response
100
+ let cleaned = text.trim();
101
+ if (cleaned.startsWith("```")) {
102
+ cleaned = cleaned.replace(/^```(?:json)?\s*\n?/, "").replace(/\n?```\s*$/, "");
103
+ }
104
+ const parsed = JSON.parse(cleaned);
105
+ if (!Array.isArray(parsed)) {
106
+ throw new Error("Expected a JSON array of OBS calls");
107
+ }
108
+ return parsed.map((item, i) => {
109
+ if (typeof item !== "object" || item === null || !("requestType" in item)) {
110
+ throw new Error(`Call at index ${i} is missing "requestType"`);
111
+ }
112
+ const obj = item;
113
+ return {
114
+ requestType: obj.requestType,
115
+ requestData: obj.requestData ?? undefined,
116
+ };
117
+ });
118
+ }
119
+ export async function yolo(argv) {
120
+ const prompt = argv.join(" ").trim();
121
+ if (!prompt) {
122
+ console.error("Usage: obsx yolo <prompt>");
123
+ console.error('Example: obsx yolo "switch to the Gaming scene"');
124
+ process.exitCode = 1;
125
+ return;
126
+ }
127
+ const apiKey = process.env.ANTHROPIC_API_KEY;
128
+ if (!apiKey) {
129
+ console.error("ANTHROPIC_API_KEY environment variable is required for the yolo command.");
130
+ process.exitCode = 1;
131
+ return;
132
+ }
133
+ const anthropic = new Anthropic({ apiKey });
134
+ await withOBS(getObsConnectionOptionsFromEnv(), async (obs) => {
135
+ const messages = [];
136
+ let failedResults = [];
137
+ for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
138
+ const state = await gatherObsState(obs);
139
+ if (attempt === 1) {
140
+ console.log("Asking Claude...");
141
+ messages.push({
142
+ role: "user",
143
+ content: `Current OBS state:\n${state}\n\nRequest: ${prompt}`,
144
+ });
145
+ }
146
+ else {
147
+ // On retries, the previous assistant response is already in messages.
148
+ // Add a user message with the errors and fresh state.
149
+ messages.push({
150
+ role: "user",
151
+ content: `Some calls failed. Here are the errors:\n${failedResults.map((r) => `- ${r.call.requestType}${r.call.requestData ? " " + JSON.stringify(r.call.requestData) : ""}: ${r.error}`).join("\n")}\n\nUpdated OBS state:\n${state}\n\nPlease generate a corrected JSON array of OBS calls to complete the original request. Only include calls that still need to succeed — don't repeat calls that already worked.`,
152
+ });
153
+ }
154
+ const message = await anthropic.messages.create({
155
+ model: "claude-sonnet-4-20250514",
156
+ max_tokens: 4096,
157
+ system: SYSTEM_PROMPT,
158
+ messages,
159
+ });
160
+ const responseText = message.content[0]?.type === "text" ? message.content[0].text : "";
161
+ // Keep conversation history for potential retries.
162
+ messages.push({ role: "assistant", content: responseText });
163
+ if (!responseText) {
164
+ console.error("No response from Claude.");
165
+ process.exitCode = 1;
166
+ return;
167
+ }
168
+ let calls;
169
+ try {
170
+ calls = parseCallsFromResponse(responseText);
171
+ }
172
+ catch (err) {
173
+ console.error("Failed to parse Claude's response as OBS calls:");
174
+ console.error(responseText);
175
+ console.error(err instanceof Error ? err.message : String(err));
176
+ process.exitCode = 1;
177
+ return;
178
+ }
179
+ if (!calls.length) {
180
+ console.log("No OBS calls to execute.");
181
+ return;
182
+ }
183
+ const label = attempt > 1 ? ` (attempt ${attempt}/${MAX_ATTEMPTS})` : "";
184
+ console.log(`Executing ${calls.length} OBS call(s)${label}:\n`);
185
+ failedResults = [];
186
+ for (const call of calls) {
187
+ const dataStr = call.requestData
188
+ ? ` ${JSON.stringify(call.requestData)}`
189
+ : "";
190
+ console.log(` ${call.requestType}${dataStr}`);
191
+ try {
192
+ const result = await obs.call(call.requestType, call.requestData);
193
+ if (result !== undefined && result !== null) {
194
+ const resultStr = JSON.stringify(result);
195
+ if (resultStr !== "{}" && resultStr !== "undefined") {
196
+ console.log(` -> ${resultStr}`);
197
+ }
198
+ }
199
+ }
200
+ catch (err) {
201
+ const msg = err instanceof Error ? err.message : String(err);
202
+ console.error(` !! Error: ${msg}`);
203
+ failedResults.push({ call, error: msg });
204
+ }
205
+ }
206
+ if (!failedResults.length) {
207
+ console.log("\nDone.");
208
+ return;
209
+ }
210
+ if (attempt < MAX_ATTEMPTS) {
211
+ console.log(`\n${failedResults.length} call(s) failed. Retrying with error feedback...`);
212
+ }
213
+ else {
214
+ console.error(`\n${failedResults.length} call(s) still failing after ${MAX_ATTEMPTS} attempts.`);
215
+ process.exitCode = 1;
216
+ }
217
+ }
218
+ });
219
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@zeke/obsx",
3
3
  "description": "A CLI for OBS",
4
- "version": "1.0.3",
4
+ "version": "1.1.0",
5
5
  "license": "MIT",
6
6
  "repository": "https://github.com/zeke/obsx",
7
7
  "type": "module",
@@ -23,11 +23,12 @@
23
23
  "access": "public"
24
24
  },
25
25
  "dependencies": {
26
+ "@anthropic-ai/sdk": "^0.74.0",
26
27
  "obs-websocket-js": "^5.0.4"
27
28
  },
28
29
  "devDependencies": {
29
- "@types/node": "^20.11.30",
30
30
  "@eslint/js": "^9.20.0",
31
+ "@types/node": "^20.11.30",
31
32
  "eslint": "^9.20.1",
32
33
  "tsx": "^4.19.1",
33
34
  "typescript": "^5.4.2",