clipwise 0.5.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -11,7 +11,7 @@ var __export = (target, all) => {
11
11
 
12
12
  // src/script/types.ts
13
13
  import { z } from "zod";
14
- var SafeSelectorSchema, NavigateActionSchema, ClickActionSchema, TypeActionSchema, ScrollActionSchema, WaitActionSchema, HoverActionSchema, ScreenshotActionSchema, WaitForSelectorActionSchema, WaitForNavigationActionSchema, WaitForURLActionSchema, WaitForFunctionActionSchema, WaitForResponseActionSchema, StepActionSchema, ZoomIntensitySchema, AutoZoomConfigSchema, ZoomEffectSchema, CursorEffectSchema, BackgroundSchema, DeviceFrameSchema, SpeedRampConfigSchema, KeystrokeConfigSchema, WatermarkConfigSchema, EffectsConfigSchema, OutputConfigSchema, StepSchema, ScenarioSchema;
14
+ var SafeSelectorSchema, NavigateActionSchema, ClickActionSchema, TypeActionSchema, ScrollActionSchema, WaitActionSchema, HoverActionSchema, ScreenshotActionSchema, WaitForSelectorActionSchema, WaitForNavigationActionSchema, WaitForURLActionSchema, WaitForFunctionActionSchema, WaitForResponseActionSchema, StepActionSchema, ZoomIntensitySchema, AutoZoomConfigSchema, ZoomEffectSchema, CursorEffectSchema, BackgroundSchema, DeviceFrameSchema, SpeedRampConfigSchema, KeystrokeConfigSchema, WatermarkConfigSchema, EffectsConfigSchema, OutputConfigSchema, StepEffectsOverrideSchema, TransitionTypeSchema, StepSchema, AudioConfigSchema, ScenarioSchema;
15
15
  var init_types = __esm({
16
16
  "src/script/types.ts"() {
17
17
  "use strict";
@@ -119,15 +119,16 @@ var init_types = __esm({
119
119
  enabled: z.boolean().default(true),
120
120
  /**
121
121
  * Numeric zoom scale (1.0 = no zoom). Overridden by `intensity` when set.
122
- * Default lowered from 1.8 → 1.35 to match "moderate" intensity.
122
+ * Default: 1.25 to match "light" intensity (industry standard).
123
123
  */
124
- scale: z.number().min(1).max(5).default(1.35),
124
+ scale: z.number().min(1).max(5).default(1.25),
125
125
  /**
126
126
  * Intensity preset — overrides `scale` when set.
127
127
  * Calibrated against Loom (light≈1.25x) and Camtasia (moderate≈1.35x).
128
+ * Default: "light" (1.25x) — matches industry standard (Screen Studio, Loom).
128
129
  */
129
- intensity: ZoomIntensitySchema.optional(),
130
- duration: z.number().default(600),
130
+ intensity: ZoomIntensitySchema.default("light"),
131
+ duration: z.number().default(800),
131
132
  easing: z.enum(["ease-in-out", "ease-in", "ease-out", "linear"]).default("ease-in-out"),
132
133
  autoZoom: AutoZoomConfigSchema.default({})
133
134
  });
@@ -135,7 +136,7 @@ var init_types = __esm({
135
136
  enabled: z.boolean().default(true),
136
137
  size: z.number().default(20),
137
138
  color: z.string().default("#000000"),
138
- speed: z.enum(["fast", "normal", "slow"]).default("fast"),
139
+ speed: z.enum(["fast", "normal", "slow"]).default("normal"),
139
140
  smoothing: z.boolean().default(true),
140
141
  clickEffect: z.boolean().default(true),
141
142
  clickColor: z.string().default("rgba(59, 130, 246, 0.3)"),
@@ -161,7 +162,7 @@ var init_types = __esm({
161
162
  });
162
163
  SpeedRampConfigSchema = z.object({
163
164
  enabled: z.boolean().default(false),
164
- idleSpeed: z.number().min(0.5).max(8).default(3),
165
+ idleSpeed: z.number().min(0.5).max(8).default(2),
165
166
  actionSpeed: z.number().min(0.25).max(2).default(0.8),
166
167
  transitionFrames: z.number().default(15)
167
168
  });
@@ -203,10 +204,11 @@ var init_types = __esm({
203
204
  watermark: WatermarkConfigSchema.default({})
204
205
  });
205
206
  OutputConfigSchema = z.object({
206
- format: z.enum(["gif", "mp4", "webm", "png-sequence"]).default("gif"),
207
+ format: z.enum(["gif", "mp4", "webm", "png-sequence"]).default("mp4"),
207
208
  width: z.number().default(1280),
208
209
  height: z.number().default(800),
209
210
  fps: z.number().min(1).max(60).default(30),
211
+ /** @deprecated Use `preset` instead. Will be removed in v0.7. */
210
212
  quality: z.number().min(1).max(100).default(80),
211
213
  // Encoding preset for MP4 output. Overrides quality when set.
212
214
  // social — optimized for Twitter/X and YouTube (CRF 25, capped bitrate)
@@ -216,12 +218,40 @@ var init_types = __esm({
216
218
  outputDir: z.string().default("./output"),
217
219
  filename: z.string().default("clipwise-recording")
218
220
  });
221
+ StepEffectsOverrideSchema = z.object({
222
+ zoom: ZoomEffectSchema.partial().optional(),
223
+ cursor: CursorEffectSchema.partial().optional(),
224
+ background: BackgroundSchema.partial().optional(),
225
+ deviceFrame: DeviceFrameSchema.partial().optional(),
226
+ speedRamp: SpeedRampConfigSchema.partial().optional(),
227
+ keystroke: KeystrokeConfigSchema.partial().optional(),
228
+ watermark: WatermarkConfigSchema.partial().optional()
229
+ }).optional();
230
+ TransitionTypeSchema = z.enum([
231
+ "none",
232
+ "fade",
233
+ "slide-left",
234
+ "slide-up",
235
+ "blur"
236
+ ]);
219
237
  StepSchema = z.object({
220
238
  name: z.string().optional(),
221
239
  actions: z.array(StepActionSchema),
222
240
  captureDelay: z.number().default(300),
223
241
  holdDuration: z.number().default(1500),
224
- transition: z.enum(["fade", "none"]).default("none")
242
+ transition: TransitionTypeSchema.default("none"),
243
+ /** Per-step effects override — merges with global effects config. */
244
+ effects: StepEffectsOverrideSchema
245
+ });
246
+ AudioConfigSchema = z.object({
247
+ /** Path to the audio file (MP3, WAV, AAC, etc.). */
248
+ file: z.string().min(1),
249
+ /** Volume level (0.0 = silent, 1.0 = full). */
250
+ volume: z.number().min(0).max(1).default(1),
251
+ /** Fade-in duration in milliseconds. */
252
+ fadeIn: z.number().min(0).default(0),
253
+ /** Fade-out duration in milliseconds. */
254
+ fadeOut: z.number().min(0).default(0)
225
255
  });
226
256
  ScenarioSchema = z.object({
227
257
  name: z.string(),
@@ -232,6 +262,8 @@ var init_types = __esm({
232
262
  }).default({}),
233
263
  effects: EffectsConfigSchema.default({}),
234
264
  output: OutputConfigSchema.default({}),
265
+ /** Optional audio narration — muxed into MP4 output. */
266
+ audio: AudioConfigSchema.optional(),
235
267
  steps: z.array(StepSchema).min(1)
236
268
  });
237
269
  }
@@ -472,12 +504,13 @@ var ClipwiseRecorder = class {
472
504
  * each input field's text on a separate line. */
473
505
  keystrokeSessionId = 0;
474
506
  currentStepIndex = 0;
507
+ isScrolling = false;
475
508
  cursorPosition = { x: 0, y: 0 };
476
509
  viewport = { width: 1280, height: 800 };
477
510
  deviceScaleFactor = 1;
478
511
  isCapturing = false;
479
512
  targetFps = 30;
480
- cursorSpeed = "fast";
513
+ cursorSpeed = "normal";
481
514
  firstContentTimestamp = 0;
482
515
  pendingResponsePromises = /* @__PURE__ */ new Map();
483
516
  // ── 중복 프레임 제거 (Phase 1-A) ──────────────────────────────────────────
@@ -510,6 +543,7 @@ var ClipwiseRecorder = class {
510
543
  this.keystrokeTimeline = [];
511
544
  this.keystrokeSessionId = 0;
512
545
  this.currentStepIndex = 0;
546
+ this.isScrolling = false;
513
547
  this.cursorPosition = { x: 0, y: 0 };
514
548
  this.isCapturing = false;
515
549
  this.firstContentTimestamp = 0;
@@ -539,11 +573,12 @@ var ClipwiseRecorder = class {
539
573
  } else {
540
574
  this.lastFrameSignature = Buffer.from(signature);
541
575
  const captureTime = Date.now();
542
- this.rawFrames.push({ buffer, timestamp: captureTime, stepIndex: this.currentStepIndex });
576
+ const rawFrame = { buffer, timestamp: captureTime, stepIndex: this.currentStepIndex, isScrolling: this.isScrolling };
577
+ this.rawFrames.push(rawFrame);
543
578
  this.dedupStats.stored++;
544
579
  if (this.frameChannel && this.firstContentTimestamp > 0) {
545
580
  const frame = this.buildFrameOnline(
546
- { buffer, timestamp: captureTime, stepIndex: this.currentStepIndex },
581
+ rawFrame,
547
582
  this.channelIndex++
548
583
  );
549
584
  this.frameChannel.push(frame);
@@ -748,7 +783,8 @@ var ClipwiseRecorder = class {
748
783
  viewport: { ...this.viewport },
749
784
  deviceScaleFactor: this.deviceScaleFactor,
750
785
  stepIndex: raw.stepIndex,
751
- keystrokes: frameKeystrokes.length > 0 ? frameKeystrokes : void 0
786
+ keystrokes: frameKeystrokes.length > 0 ? frameKeystrokes : void 0,
787
+ isScrolling: raw.isScrolling || void 0
752
788
  };
753
789
  }
754
790
  /**
@@ -884,6 +920,7 @@ var ClipwiseRecorder = class {
884
920
  case "scroll": {
885
921
  const scrollTarget = action.selector ? await getElementCenter(this.page, action.selector, action.timeout) : null;
886
922
  const scrollDistance = Math.abs(action.y) + Math.abs(action.x);
923
+ this.isScrolling = true;
887
924
  if (action.smooth && scrollDistance > 0) {
888
925
  const scrollSteps = Math.max(12, Math.round(scrollDistance / 25));
889
926
  const yStep = action.y / scrollSteps;
@@ -923,6 +960,7 @@ var ClipwiseRecorder = class {
923
960
  timestamp: Date.now()
924
961
  });
925
962
  }
963
+ this.isScrolling = false;
926
964
  await this.waitWithRepaints(120);
927
965
  break;
928
966
  }
@@ -1088,8 +1126,9 @@ var ClipwiseRecorder = class {
1088
1126
  viewport: { ...this.viewport },
1089
1127
  deviceScaleFactor: this.deviceScaleFactor,
1090
1128
  keystrokes: frameKeystrokes.length > 0 ? frameKeystrokes : void 0,
1091
- stepIndex: raw.stepIndex
1129
+ stepIndex: raw.stepIndex,
1092
1130
  // use per-frame step index captured at event time
1131
+ isScrolling: raw.isScrolling || void 0
1093
1132
  };
1094
1133
  });
1095
1134
  }
@@ -1912,7 +1951,8 @@ async function composeFrame(frame, effects, output, context) {
1912
1951
  }
1913
1952
  const scale = ctx.zoomScale;
1914
1953
  if (effects.zoom.enabled && scale > 1) {
1915
- const rawFocus = frame.clickPosition ?? frame.cursorPosition ?? { x: frame.viewport.width / 2, y: frame.viewport.height / 2 };
1954
+ const followCursor = effects.zoom.autoZoom.followCursor;
1955
+ const rawFocus = followCursor ? frame.cursorPosition ?? frame.clickPosition ?? { x: frame.viewport.width / 2, y: frame.viewport.height / 2 } : frame.clickPosition ?? frame.cursorPosition ?? { x: frame.viewport.width / 2, y: frame.viewport.height / 2 };
1916
1956
  const offset = getFrameOffset(effects.deviceFrame, dpr);
1917
1957
  const focusPoint = {
1918
1958
  x: rawFocus.x * dpr + offset.left,
@@ -1980,37 +2020,170 @@ async function composeFrame(frame, effects, output, context) {
1980
2020
 
1981
2021
  // src/effects/transition.ts
1982
2022
  import sharp8 from "sharp";
2023
+ async function decodeToRaw(buf, rawInfo, targetWidth, targetHeight) {
2024
+ const src = rawInfo ? sharp8(buf, { raw: { width: rawInfo.width, height: rawInfo.height, channels: rawInfo.channels } }) : sharp8(buf);
2025
+ const pipeline = targetWidth && targetHeight ? src.resize(targetWidth, targetHeight, { fit: "fill" }).ensureAlpha().raw() : src.ensureAlpha().raw();
2026
+ const { data, info } = await pipeline.toBuffer({ resolveWithObject: true });
2027
+ return { data: Buffer.from(data), width: info.width, height: info.height };
2028
+ }
2029
+ function returnRaw(buf, rawInfo, w, h) {
2030
+ if (rawInfo) return { buffer: buf, rawInfo };
2031
+ return null;
2032
+ }
1983
2033
  async function applyCrossfade(fromBuffer, toBuffer, progress, width, height, fromRawInfo, toRawInfo) {
1984
2034
  const t = Math.max(0, Math.min(1, progress));
1985
2035
  if (t <= 0) {
1986
- const rawInfo = fromRawInfo ?? { width, height, channels: 4 };
1987
- if (fromRawInfo) return { buffer: fromBuffer, rawInfo };
1988
- const { data, info } = await sharp8(fromBuffer).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
1989
- return { buffer: Buffer.from(data), rawInfo: { width: info.width, height: info.height, channels: 4 } };
2036
+ const fast = returnRaw(fromBuffer, fromRawInfo, width, height);
2037
+ if (fast) return fast;
2038
+ const d = await decodeToRaw(fromBuffer, void 0);
2039
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
2040
+ }
2041
+ if (t >= 1) {
2042
+ const fast = returnRaw(toBuffer, toRawInfo, width, height);
2043
+ if (fast) return fast;
2044
+ const d = await decodeToRaw(toBuffer, void 0);
2045
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
2046
+ }
2047
+ const from = await decodeToRaw(fromBuffer, fromRawInfo);
2048
+ const to = await decodeToRaw(toBuffer, toRawInfo, from.width, from.height);
2049
+ const pixels = Buffer.alloc(from.data.length);
2050
+ for (let i = 0; i < from.data.length; i++) {
2051
+ pixels[i] = Math.round(from.data[i] * (1 - t) + to.data[i] * t);
2052
+ }
2053
+ return { buffer: pixels, rawInfo: { width: from.width, height: from.height, channels: 4 } };
2054
+ }
2055
+ async function applySlide(fromBuffer, toBuffer, progress, width, height, direction, fromRawInfo, toRawInfo) {
2056
+ const t = Math.max(0, Math.min(1, progress));
2057
+ if (t <= 0) {
2058
+ const fast = returnRaw(fromBuffer, fromRawInfo, width, height);
2059
+ if (fast) return fast;
2060
+ const d = await decodeToRaw(fromBuffer, void 0);
2061
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
2062
+ }
2063
+ if (t >= 1) {
2064
+ const fast = returnRaw(toBuffer, toRawInfo, width, height);
2065
+ if (fast) return fast;
2066
+ const d = await decodeToRaw(toBuffer, void 0);
2067
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
2068
+ }
2069
+ const from = await decodeToRaw(fromBuffer, fromRawInfo);
2070
+ const to = await decodeToRaw(toBuffer, toRawInfo, from.width, from.height);
2071
+ const w = from.width;
2072
+ const h = from.height;
2073
+ const pixels = Buffer.alloc(from.data.length);
2074
+ const eased = easeInOutCubic3(t);
2075
+ if (direction === "left") {
2076
+ const offsetX = Math.round(w * (1 - eased));
2077
+ for (let y = 0; y < h; y++) {
2078
+ for (let x = 0; x < w; x++) {
2079
+ const dstIdx = (y * w + x) * 4;
2080
+ const srcX = x + offsetX;
2081
+ if (srcX < w) {
2082
+ const srcIdx = (y * w + srcX) * 4;
2083
+ pixels[dstIdx] = to.data[srcIdx];
2084
+ pixels[dstIdx + 1] = to.data[srcIdx + 1];
2085
+ pixels[dstIdx + 2] = to.data[srcIdx + 2];
2086
+ pixels[dstIdx + 3] = to.data[srcIdx + 3];
2087
+ } else {
2088
+ const fromX = srcX - w;
2089
+ if (fromX < w) {
2090
+ const srcIdx = (y * w + fromX) * 4;
2091
+ pixels[dstIdx] = from.data[srcIdx];
2092
+ pixels[dstIdx + 1] = from.data[srcIdx + 1];
2093
+ pixels[dstIdx + 2] = from.data[srcIdx + 2];
2094
+ pixels[dstIdx + 3] = from.data[srcIdx + 3];
2095
+ }
2096
+ }
2097
+ }
2098
+ }
2099
+ } else {
2100
+ const offsetY = Math.round(h * (1 - eased));
2101
+ for (let y = 0; y < h; y++) {
2102
+ for (let x = 0; x < w; x++) {
2103
+ const dstIdx = (y * w + x) * 4;
2104
+ const srcY = y + offsetY;
2105
+ if (srcY < h) {
2106
+ const srcIdx = (srcY * w + x) * 4;
2107
+ pixels[dstIdx] = to.data[srcIdx];
2108
+ pixels[dstIdx + 1] = to.data[srcIdx + 1];
2109
+ pixels[dstIdx + 2] = to.data[srcIdx + 2];
2110
+ pixels[dstIdx + 3] = to.data[srcIdx + 3];
2111
+ } else {
2112
+ const fromY = srcY - h;
2113
+ if (fromY < h) {
2114
+ const srcIdx = (fromY * w + x) * 4;
2115
+ pixels[dstIdx] = from.data[srcIdx];
2116
+ pixels[dstIdx + 1] = from.data[srcIdx + 1];
2117
+ pixels[dstIdx + 2] = from.data[srcIdx + 2];
2118
+ pixels[dstIdx + 3] = from.data[srcIdx + 3];
2119
+ }
2120
+ }
2121
+ }
2122
+ }
2123
+ }
2124
+ return { buffer: pixels, rawInfo: { width: w, height: h, channels: 4 } };
2125
+ }
2126
+ async function applyBlur(fromBuffer, toBuffer, progress, width, height, fromRawInfo, toRawInfo) {
2127
+ const t = Math.max(0, Math.min(1, progress));
2128
+ if (t <= 0) {
2129
+ const fast = returnRaw(fromBuffer, fromRawInfo, width, height);
2130
+ if (fast) return fast;
2131
+ const d = await decodeToRaw(fromBuffer, void 0);
2132
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
1990
2133
  }
1991
2134
  if (t >= 1) {
1992
- const rawInfo = toRawInfo ?? { width, height, channels: 4 };
1993
- if (toRawInfo) return { buffer: toBuffer, rawInfo };
1994
- const { data, info } = await sharp8(toBuffer).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
1995
- return { buffer: Buffer.from(data), rawInfo: { width: info.width, height: info.height, channels: 4 } };
2135
+ const fast = returnRaw(toBuffer, toRawInfo, width, height);
2136
+ if (fast) return fast;
2137
+ const d = await decodeToRaw(toBuffer, void 0);
2138
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
1996
2139
  }
2140
+ const sigma = t * 20;
1997
2141
  const fromSrc = fromRawInfo ? sharp8(fromBuffer, { raw: { width: fromRawInfo.width, height: fromRawInfo.height, channels: fromRawInfo.channels } }) : sharp8(fromBuffer);
1998
- const fromRaw = await fromSrc.ensureAlpha().raw().toBuffer({ resolveWithObject: true });
1999
- const toSrc = toRawInfo ? sharp8(toBuffer, { raw: { width: toRawInfo.width, height: toRawInfo.height, channels: toRawInfo.channels } }) : sharp8(toBuffer);
2000
- const toRaw = await toSrc.resize(fromRaw.info.width, fromRaw.info.height, { fit: "fill" }).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
2001
- const pixels = Buffer.alloc(fromRaw.data.length);
2002
- for (let i = 0; i < fromRaw.data.length; i++) {
2003
- pixels[i] = Math.round(
2004
- fromRaw.data[i] * (1 - t) + toRaw.data[i] * t
2005
- );
2142
+ const blurredFrom = await fromSrc.blur(Math.max(0.3, sigma)).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
2143
+ const to = await decodeToRaw(toBuffer, toRawInfo, blurredFrom.info.width, blurredFrom.info.height);
2144
+ const pixels = Buffer.alloc(blurredFrom.data.length);
2145
+ for (let i = 0; i < blurredFrom.data.length; i++) {
2146
+ pixels[i] = Math.round(blurredFrom.data[i] * (1 - t) + to.data[i] * t);
2006
2147
  }
2007
2148
  return {
2008
2149
  buffer: pixels,
2009
- rawInfo: { width: fromRaw.info.width, height: fromRaw.info.height, channels: 4 }
2150
+ rawInfo: { width: blurredFrom.info.width, height: blurredFrom.info.height, channels: 4 }
2010
2151
  };
2011
2152
  }
2153
+ async function applyTransition(type, fromBuffer, toBuffer, progress, width, height, fromRawInfo, toRawInfo) {
2154
+ switch (type) {
2155
+ case "fade":
2156
+ return applyCrossfade(fromBuffer, toBuffer, progress, width, height, fromRawInfo, toRawInfo);
2157
+ case "slide-left":
2158
+ return applySlide(fromBuffer, toBuffer, progress, width, height, "left", fromRawInfo, toRawInfo);
2159
+ case "slide-up":
2160
+ return applySlide(fromBuffer, toBuffer, progress, width, height, "up", fromRawInfo, toRawInfo);
2161
+ case "blur":
2162
+ return applyBlur(fromBuffer, toBuffer, progress, width, height, fromRawInfo, toRawInfo);
2163
+ case "none":
2164
+ default:
2165
+ const d = await decodeToRaw(toBuffer, toRawInfo);
2166
+ return { buffer: d.data, rawInfo: { width: d.width, height: d.height, channels: 4 } };
2167
+ }
2168
+ }
2169
+ function easeInOutCubic3(t) {
2170
+ return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
2171
+ }
2012
2172
 
2013
2173
  // src/compose/canvas-renderer.ts
2174
+ function mergeStepEffects(global, stepIndex, steps) {
2175
+ if (stepIndex === void 0 || !steps[stepIndex]?.effects) return global;
2176
+ const override = steps[stepIndex].effects;
2177
+ return {
2178
+ zoom: override.zoom ? { ...global.zoom, ...override.zoom } : global.zoom,
2179
+ cursor: override.cursor ? { ...global.cursor, ...override.cursor } : global.cursor,
2180
+ background: override.background ? { ...global.background, ...override.background } : global.background,
2181
+ deviceFrame: override.deviceFrame ? { ...global.deviceFrame, ...override.deviceFrame } : global.deviceFrame,
2182
+ speedRamp: override.speedRamp ? { ...global.speedRamp, ...override.speedRamp } : global.speedRamp,
2183
+ keystroke: override.keystroke ? { ...global.keystroke, ...override.keystroke } : global.keystroke,
2184
+ watermark: override.watermark ? { ...global.watermark, ...override.watermark } : global.watermark
2185
+ };
2186
+ }
2014
2187
  var MIN_FRAMES_PER_WORKER = 4;
2015
2188
  var cachedWorkerUrl = null;
2016
2189
  function getWorkerUrl() {
@@ -2066,14 +2239,17 @@ var CanvasRenderer = class {
2066
2239
  const cpuCount = os.cpus().length;
2067
2240
  const workerCount = Math.min(cpuCount, 8);
2068
2241
  const useWorkers = workerCount >= 2 && processFrames.length >= workerCount * MIN_FRAMES_PER_WORKER;
2242
+ const perFrameEffects = processFrames.map(
2243
+ (f) => mergeStepEffects(this.effects, f.stepIndex, this.steps)
2244
+ );
2069
2245
  let composed;
2070
2246
  if (useWorkers) {
2071
- composed = await this.processWithWorkers(processFrames, contexts, workerCount);
2247
+ composed = await this.processWithWorkers(processFrames, contexts, workerCount, perFrameEffects);
2072
2248
  } else {
2073
2249
  composed = [];
2074
2250
  for (let i = 0; i < processFrames.length; i++) {
2075
2251
  composed.push(
2076
- await composeFrame(processFrames[i], this.effects, this.output, contexts[i])
2252
+ await composeFrame(processFrames[i], perFrameEffects[i], this.output, contexts[i])
2077
2253
  );
2078
2254
  }
2079
2255
  }
@@ -2086,7 +2262,7 @@ var CanvasRenderer = class {
2086
2262
  * Distribute frame composition across a pool of worker threads.
2087
2263
  * Workers process frames concurrently; results are collected in order.
2088
2264
  */
2089
- processWithWorkers(frames, contexts, workerCount) {
2265
+ processWithWorkers(frames, contexts, workerCount, perFrameEffects) {
2090
2266
  return new Promise((resolve2, reject) => {
2091
2267
  const results = new Array(frames.length);
2092
2268
  let completed = 0;
@@ -2100,7 +2276,7 @@ var CanvasRenderer = class {
2100
2276
  worker.postMessage({
2101
2277
  taskId: i,
2102
2278
  frame: frames[i],
2103
- effects: this.effects,
2279
+ effects: perFrameEffects ? perFrameEffects[i] : this.effects,
2104
2280
  output: this.output,
2105
2281
  context: contexts[i]
2106
2282
  });
@@ -2163,6 +2339,9 @@ var CanvasRenderer = class {
2163
2339
  effectiveScale,
2164
2340
  transitionFrames
2165
2341
  );
2342
+ if (frame.isScrolling && zoomScale > 1) {
2343
+ zoomScale = 1;
2344
+ }
2166
2345
  }
2167
2346
  const clickProgress = frame.clickPosition != null ? frame.clickProgress ?? 0.5 : null;
2168
2347
  const trailLength = this.effects.cursor.trailLength;
@@ -2237,7 +2416,7 @@ var CanvasRenderer = class {
2237
2416
  * using the same applyTransitionsToStream() logic as composeStream().
2238
2417
  */
2239
2418
  async *composeStreamOnline(source) {
2240
- const hasFadeTransitions = this.steps.some((s) => s.transition === "fade");
2419
+ const hasFadeTransitions = this.steps.some((s) => s.transition !== "none");
2241
2420
  if (!hasFadeTransitions) {
2242
2421
  const cpuCount = os.cpus().length;
2243
2422
  const workerCount = Math.min(cpuCount, 8);
@@ -2293,6 +2472,9 @@ var CanvasRenderer = class {
2293
2472
  effectiveScale,
2294
2473
  transitionFrames
2295
2474
  );
2475
+ if (frame.isScrolling && zoomScale > 1) {
2476
+ zoomScale = 1;
2477
+ }
2296
2478
  }
2297
2479
  const clickProgress = frame.clickPosition != null ? frame.clickProgress ?? 0.5 : null;
2298
2480
  const trail = [];
@@ -2304,10 +2486,11 @@ var CanvasRenderer = class {
2304
2486
  const dispatch = (worker) => {
2305
2487
  if (canDispatch(nextToDispatch)) {
2306
2488
  const i = nextToDispatch++;
2489
+ const frameEffects = mergeStepEffects(this.effects, frames[i].stepIndex, this.steps);
2307
2490
  worker.postMessage({
2308
2491
  taskId: i,
2309
2492
  frame: frames[i],
2310
- effects: this.effects,
2493
+ effects: frameEffects,
2311
2494
  output: this.output,
2312
2495
  context: computeContext(i)
2313
2496
  });
@@ -2417,13 +2600,16 @@ var CanvasRenderer = class {
2417
2600
  const workerUrl = getWorkerUrl();
2418
2601
  const workers = [];
2419
2602
  let nextToDispatch = 0;
2603
+ const perFrameEffects = frames.map(
2604
+ (f) => mergeStepEffects(this.effects, f.stepIndex, this.steps)
2605
+ );
2420
2606
  const dispatch = (worker) => {
2421
2607
  if (nextToDispatch >= frames.length || workerError) return;
2422
2608
  const i = nextToDispatch++;
2423
2609
  worker.postMessage({
2424
2610
  taskId: i,
2425
2611
  frame: frames[i],
2426
- effects: this.effects,
2612
+ effects: perFrameEffects[i],
2427
2613
  output: this.output,
2428
2614
  context: contexts[i]
2429
2615
  });
@@ -2474,7 +2660,8 @@ var CanvasRenderer = class {
2474
2660
  */
2475
2661
  async *streamSequential(frames, contexts) {
2476
2662
  for (let i = 0; i < frames.length; i++) {
2477
- yield await composeFrame(frames[i], this.effects, this.output, contexts[i]);
2663
+ const frameEffects = mergeStepEffects(this.effects, frames[i].stepIndex, this.steps);
2664
+ yield await composeFrame(frames[i], frameEffects, this.output, contexts[i]);
2478
2665
  }
2479
2666
  }
2480
2667
  /**
@@ -2489,11 +2676,11 @@ var CanvasRenderer = class {
2489
2676
  if (frames[i].stepIndex !== void 0 && frames[i - 1].stepIndex !== void 0 && frames[i].stepIndex !== frames[i - 1].stepIndex) {
2490
2677
  const stepIdx = frames[i].stepIndex;
2491
2678
  const step = this.steps[stepIdx];
2492
- if (step && step.transition === "fade") {
2679
+ if (step && step.transition !== "none") {
2493
2680
  const startIdx = Math.max(0, i - Math.floor(transitionFrames / 2));
2494
2681
  const endIdx = Math.min(frames.length - 1, i + Math.ceil(transitionFrames / 2));
2495
2682
  if (endIdx - startIdx >= 2) {
2496
- windows.push({ startIdx, endIdx });
2683
+ windows.push({ startIdx, endIdx, type: step.transition });
2497
2684
  }
2498
2685
  }
2499
2686
  }
@@ -2540,10 +2727,12 @@ var CanvasRenderer = class {
2540
2727
  const fromBuf = state.frames[0].buffer;
2541
2728
  const toBuf = state.frames[state.frames.length - 1].buffer;
2542
2729
  const range = state.frames.length - 1;
2730
+ const transType = win.type;
2543
2731
  const fromRawInfo = state.frames[0].rawInfo;
2544
2732
  const toRawInfo = state.frames[state.frames.length - 1].rawInfo;
2545
2733
  for (let j = 1; j < state.frames.length - 1; j++) {
2546
- const blended = await applyCrossfade(
2734
+ const blended = await applyTransition(
2735
+ transType,
2547
2736
  fromBuf,
2548
2737
  toBuf,
2549
2738
  j / range,
@@ -2585,8 +2774,8 @@ var CanvasRenderer = class {
2585
2774
  if (frames[i].stepIndex !== void 0 && frames[i - 1].stepIndex !== void 0 && frames[i].stepIndex !== frames[i - 1].stepIndex) {
2586
2775
  const stepIdx = frames[i].stepIndex;
2587
2776
  const step = this.steps[stepIdx];
2588
- if (step && step.transition === "fade") {
2589
- boundaries.push({ index: i, stepIndex: stepIdx });
2777
+ if (step && step.transition !== "none") {
2778
+ boundaries.push({ index: i, stepIndex: stepIdx, type: step.transition });
2590
2779
  }
2591
2780
  }
2592
2781
  }
@@ -2601,7 +2790,8 @@ var CanvasRenderer = class {
2601
2790
  const toRawInfo = composed[endIdx].rawInfo;
2602
2791
  for (let i = startIdx + 1; i < endIdx; i++) {
2603
2792
  const progress = (i - startIdx) / range;
2604
- const blended = await applyCrossfade(
2793
+ const blended = await applyTransition(
2794
+ boundary.type,
2605
2795
  fromBuffer,
2606
2796
  toBuffer,
2607
2797
  progress,
@@ -2678,19 +2868,19 @@ async function encodeGif(frames, config) {
2678
2868
  gif.finish();
2679
2869
  return Buffer.from(gif.bytes());
2680
2870
  }
2681
- async function encodeMp4Stream(frames, config) {
2871
+ async function encodeMp4Stream(frames, config, audio) {
2682
2872
  const outputPath = join(tmpdir(), `clipwise-${Date.now()}-${Math.random().toString(36).slice(2)}.mp4`);
2683
2873
  try {
2684
2874
  const encoder = await detectVideoEncoder();
2685
2875
  const params = resolveEncodingParams(config);
2686
- await pipeStreamToFfmpeg(frames, config, params, encoder, outputPath);
2876
+ await pipeStreamToFfmpeg(frames, config, params, encoder, outputPath, audio);
2687
2877
  return await readFile2(outputPath);
2688
2878
  } finally {
2689
2879
  await rm(outputPath, { force: true }).catch(() => {
2690
2880
  });
2691
2881
  }
2692
2882
  }
2693
- async function pipeStreamToFfmpeg(frames, config, params, encoder, outputPath) {
2883
+ async function pipeStreamToFfmpeg(frames, config, params, encoder, outputPath, audio) {
2694
2884
  const videoArgs = encoder === "hevc_videotoolbox" ? [
2695
2885
  "-c:v",
2696
2886
  "hevc_videotoolbox",
@@ -2723,6 +2913,14 @@ async function pipeStreamToFfmpeg(frames, config, params, encoder, outputPath) {
2723
2913
  "-pix_fmt",
2724
2914
  "yuv420p"
2725
2915
  ];
2916
+ const audioInputArgs = audio ? ["-i", audio.file] : ["-f", "lavfi", "-i", "anullsrc=r=48000:cl=stereo"];
2917
+ const audioFilters = [];
2918
+ if (audio) {
2919
+ if (audio.volume !== 1) audioFilters.push(`volume=${audio.volume}`);
2920
+ if (audio.fadeIn > 0) audioFilters.push(`afade=t=in:d=${audio.fadeIn / 1e3}`);
2921
+ if (audio.fadeOut > 0) audioFilters.push(`afade=t=out:st=999999:d=${audio.fadeOut / 1e3}`);
2922
+ }
2923
+ const audioFilterArgs = audioFilters.length > 0 ? ["-af", audioFilters.join(",")] : [];
2726
2924
  return new Promise((resolve2, reject) => {
2727
2925
  const ffmpeg = spawn(
2728
2926
  "ffmpeg",
@@ -2738,15 +2936,13 @@ async function pipeStreamToFfmpeg(frames, config, params, encoder, outputPath) {
2738
2936
  String(config.fps),
2739
2937
  "-i",
2740
2938
  "pipe:0",
2741
- "-f",
2742
- "lavfi",
2743
- "-i",
2744
- "anullsrc=r=48000:cl=stereo",
2939
+ ...audioInputArgs,
2745
2940
  ...videoArgs,
2746
2941
  "-c:a",
2747
2942
  "aac",
2748
2943
  "-b:a",
2749
2944
  "128k",
2945
+ ...audioFilterArgs,
2750
2946
  "-shortest",
2751
2947
  "-movflags",
2752
2948
  "+faststart",
@@ -2835,7 +3031,8 @@ var ConcurrentSession = class extends EventEmitter {
2835
3031
  yield frame;
2836
3032
  }
2837
3033
  })(),
2838
- this.scenario.output
3034
+ this.scenario.output,
3035
+ this.scenario.audio
2839
3036
  );
2840
3037
  const session = await handle.done;
2841
3038
  this.emit("progress", { composed, total: composed, pct: 100 });
@@ -2875,7 +3072,8 @@ var StreamingSession = class extends EventEmitter {
2875
3072
  yield frame;
2876
3073
  }
2877
3074
  })(),
2878
- scenario.output
3075
+ scenario.output,
3076
+ scenario.audio
2879
3077
  );
2880
3078
  }
2881
3079
  };
@@ -2888,7 +3086,7 @@ import { homedir } from "os";
2888
3086
  var program = new Command();
2889
3087
  program.name("clipwise").description(
2890
3088
  "Playwright-based cinematic screen recorder for product demos"
2891
- ).version("0.1.0");
3089
+ ).version("0.6.0");
2892
3090
  program.command("record").description("Record a demo from a YAML scenario file").argument("<scenario>", "Path to YAML scenario file").option("-o, --output <dir>", "Output directory", "./output").option(
2893
3091
  "-f, --format <format>",
2894
3092
  "Output format (gif|mp4|png-sequence)"
@@ -3007,7 +3205,8 @@ program.command("record").description("Record a demo from a YAML scenario file")
3007
3205
  }
3008
3206
  await writeFile2(outputPath, mp4Buffer);
3009
3207
  const sizeMB = (mp4Buffer.length / (1024 * 1024)).toFixed(2);
3010
- spinner.succeed(`MP4 saved to ${chalk.bold(outputPath)} (${sizeMB} MB)`);
3208
+ const audioMsg = scenario.audio ? ` + audio: ${scenario.audio.file}` : "";
3209
+ spinner.succeed(`MP4 saved to ${chalk.bold(outputPath)} (${sizeMB} MB${audioMsg})`);
3011
3210
  } else {
3012
3211
  let composedFrames;
3013
3212
  if (options.effects !== false) {
@@ -3150,7 +3349,7 @@ program.command("demo").description("Record a demo video of the Clipwise showcas
3150
3349
  ).action(async (options) => {
3151
3350
  const spinner = ora();
3152
3351
  try {
3153
- const demoUrl = options.url ?? "https://kwakseongjae.github.io/clipwise/";
3352
+ const demoUrl = options.url ?? "https://kwakseongjae.github.io/clipwise/demo/";
3154
3353
  const device = options.device;
3155
3354
  const isMobile = device === "iphone" || device === "android";
3156
3355
  const isTablet = device === "ipad";
@@ -3165,7 +3364,10 @@ program.command("demo").description("Record a demo video of the Clipwise showcas
3165
3364
  name: "Load dashboard",
3166
3365
  captureDelay: 100,
3167
3366
  holdDuration: 1e3,
3168
- actions: [{ action: "navigate", url: demoUrl, waitUntil: "load" }]
3367
+ actions: [
3368
+ { action: "navigate", url: demoUrl, waitUntil: "load" },
3369
+ { action: "waitForSelector", selector: "#stat-users", state: "visible", timeout: 15e3 }
3370
+ ]
3169
3371
  },
3170
3372
  {
3171
3373
  name: "Hover Users stat",
@@ -704,7 +704,8 @@ async function composeFrame(frame, effects, output, context) {
704
704
  }
705
705
  const scale = ctx.zoomScale;
706
706
  if (effects.zoom.enabled && scale > 1) {
707
- const rawFocus = frame.clickPosition ?? frame.cursorPosition ?? { x: frame.viewport.width / 2, y: frame.viewport.height / 2 };
707
+ const followCursor = effects.zoom.autoZoom.followCursor;
708
+ const rawFocus = followCursor ? frame.cursorPosition ?? frame.clickPosition ?? { x: frame.viewport.width / 2, y: frame.viewport.height / 2 } : frame.clickPosition ?? frame.cursorPosition ?? { x: frame.viewport.width / 2, y: frame.viewport.height / 2 };
708
709
  const offset = getFrameOffset(effects.deviceFrame, dpr);
709
710
  const focusPoint = {
710
711
  x: rawFocus.x * dpr + offset.left,