visual-ai-assertions 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -119,16 +119,24 @@ declare const StatementResultSchema: z.ZodObject<{
119
119
  pass: z.ZodBoolean;
120
120
  reasoning: z.ZodString;
121
121
  confidence: z.ZodOptional<z.ZodEnum<["high", "medium", "low"]>>;
122
+ /**
123
+ * For video inputs, the approximate timestamp (in seconds, from the start of the clip)
124
+ * of the frame that most clearly demonstrates the statement. `null` when the statement
125
+ * fails or applies across the whole clip. Always omitted for image inputs.
126
+ */
127
+ timestampSeconds: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
122
128
  }, "strip", z.ZodTypeAny, {
123
129
  statement: string;
124
130
  pass: boolean;
125
131
  reasoning: string;
126
132
  confidence?: "high" | "medium" | "low" | undefined;
133
+ timestampSeconds?: number | null | undefined;
127
134
  }, {
128
135
  statement: string;
129
136
  pass: boolean;
130
137
  reasoning: string;
131
138
  confidence?: "high" | "medium" | "low" | undefined;
139
+ timestampSeconds?: number | null | undefined;
132
140
  }>;
133
141
  /** Outcome of a single statement evaluated by `check()`. */
134
142
  type StatementResult = z.infer<typeof StatementResultSchema>;
@@ -155,7 +163,14 @@ declare const UsageInfoSchema: z.ZodObject<{
155
163
  }>;
156
164
  /** Token usage and optional cost/latency metadata for a provider call. */
157
165
  type UsageInfo = z.infer<typeof UsageInfoSchema>;
158
- /** Zod schema for results returned by `check()` and template helpers. */
166
+ /**
167
+ * Zod schema for results returned by `check()` and template helpers.
168
+ *
169
+ * Note: the runtime `CheckResult` TypeScript type extends this schema with
170
+ * an optional `frames` field that is populated client-side for video inputs.
171
+ * Parsing a stored `CheckResult` through this schema will silently drop
172
+ * `frames` because the schema only describes what the model returns.
173
+ */
159
174
  declare const CheckResultSchema: z.ZodObject<{
160
175
  pass: z.ZodBoolean;
161
176
  reasoning: z.ZodString;
@@ -201,16 +216,24 @@ declare const CheckResultSchema: z.ZodObject<{
201
216
  pass: z.ZodBoolean;
202
217
  reasoning: z.ZodString;
203
218
  confidence: z.ZodOptional<z.ZodEnum<["high", "medium", "low"]>>;
219
+ /**
220
+ * For video inputs, the approximate timestamp (in seconds, from the start of the clip)
221
+ * of the frame that most clearly demonstrates the statement. `null` when the statement
222
+ * fails or applies across the whole clip. Always omitted for image inputs.
223
+ */
224
+ timestampSeconds: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
204
225
  }, "strip", z.ZodTypeAny, {
205
226
  statement: string;
206
227
  pass: boolean;
207
228
  reasoning: string;
208
229
  confidence?: "high" | "medium" | "low" | undefined;
230
+ timestampSeconds?: number | null | undefined;
209
231
  }, {
210
232
  statement: string;
211
233
  pass: boolean;
212
234
  reasoning: string;
213
235
  confidence?: "high" | "medium" | "low" | undefined;
236
+ timestampSeconds?: number | null | undefined;
214
237
  }>, "many">;
215
238
  }, "strip", z.ZodTypeAny, {
216
239
  issues: {
@@ -226,6 +249,7 @@ declare const CheckResultSchema: z.ZodObject<{
226
249
  pass: boolean;
227
250
  reasoning: string;
228
251
  confidence?: "high" | "medium" | "low" | undefined;
252
+ timestampSeconds?: number | null | undefined;
229
253
  }[];
230
254
  usage?: {
231
255
  inputTokens: number;
@@ -248,6 +272,7 @@ declare const CheckResultSchema: z.ZodObject<{
248
272
  pass: boolean;
249
273
  reasoning: string;
250
274
  confidence?: "high" | "medium" | "low" | undefined;
275
+ timestampSeconds?: number | null | undefined;
251
276
  }[];
252
277
  usage?: {
253
278
  inputTokens: number;
@@ -257,8 +282,23 @@ declare const CheckResultSchema: z.ZodObject<{
257
282
  durationSeconds?: number | undefined;
258
283
  } | undefined;
259
284
  }>;
285
+ /**
286
+ * Metadata describing the sampled-frame timeline used when the input was a video.
287
+ * Populated client-side; not part of the model's response.
288
+ */
289
+ interface VideoFramesMetadata {
290
+ /** Total number of frames sampled from the video. */
291
+ count: number;
292
+ /** Timestamp (seconds, from the start of the clip) of each sampled frame, in order. */
293
+ timestampsSeconds: number[];
294
+ /** Total duration of the source video in seconds. */
295
+ durationSeconds: number;
296
+ }
260
297
  /** Result returned by `check()` and the template convenience methods. */
261
- type CheckResult = z.infer<typeof CheckResultSchema>;
298
+ type CheckResult = z.infer<typeof CheckResultSchema> & {
299
+ /** Present only when the input was a video. Describes which frames the model saw. */
300
+ frames?: VideoFramesMetadata;
301
+ };
262
302
  /** Zod schema for an individual visual change reported by `compare()`. */
263
303
  declare const ChangeEntrySchema: z.ZodObject<{
264
304
  description: z.ZodString;
@@ -340,7 +380,14 @@ declare const CompareResultSchema: z.ZodObject<{
340
380
  type CompareResult = z.infer<typeof CompareResultSchema> & {
341
381
  diffImage?: DiffImageResult;
342
382
  };
343
- /** Zod schema for results returned by `ask()`. */
383
+ /**
384
+ * Zod schema for results returned by `ask()`.
385
+ *
386
+ * Note: the runtime `AskResult` TypeScript type extends this schema with an
387
+ * optional `frames` field that is populated client-side for video inputs.
388
+ * Parsing a stored `AskResult` through this schema will silently drop
389
+ * `frames` because the schema only describes what the model returns.
390
+ */
344
391
  declare const AskResultSchema: z.ZodObject<{
345
392
  summary: z.ZodString;
346
393
  issues: z.ZodArray<z.ZodObject<{
@@ -359,6 +406,11 @@ declare const AskResultSchema: z.ZodObject<{
359
406
  description: string;
360
407
  suggestion: string;
361
408
  }>, "many">;
409
+ /**
410
+ * For video inputs, the indices of frames the model relied on to answer.
411
+ * Indices are 0-based and refer to entries in `frames.timestampsSeconds`.
412
+ */
413
+ frameReferences: z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>;
362
414
  usage: z.ZodOptional<z.ZodObject<{
363
415
  inputTokens: z.ZodNumber;
364
416
  outputTokens: z.ZodNumber;
@@ -394,6 +446,7 @@ declare const AskResultSchema: z.ZodObject<{
394
446
  estimatedCost?: number | undefined;
395
447
  durationSeconds?: number | undefined;
396
448
  } | undefined;
449
+ frameReferences?: number[] | undefined;
397
450
  }, {
398
451
  issues: {
399
452
  priority: "critical" | "major" | "minor";
@@ -409,13 +462,25 @@ declare const AskResultSchema: z.ZodObject<{
409
462
  estimatedCost?: number | undefined;
410
463
  durationSeconds?: number | undefined;
411
464
  } | undefined;
465
+ frameReferences?: number[] | undefined;
412
466
  }>;
413
467
  /** Result returned by `ask()`. */
414
- type AskResult = z.infer<typeof AskResultSchema>;
468
+ type AskResult = z.infer<typeof AskResultSchema> & {
469
+ /** Present only when the input was a video. Describes which frames the model saw. */
470
+ frames?: VideoFramesMetadata;
471
+ };
415
472
  /** Supported input shapes for image arguments accepted by the client. */
416
473
  type ImageInput = Buffer | Uint8Array | string;
474
+ /**
475
+ * Supported input shapes for media arguments accepted by the client.
476
+ * Identical to `ImageInput` today — the client auto-detects whether the bytes are
477
+ * an image or a video.
478
+ */
479
+ type MediaInput = ImageInput;
417
480
  /** Supported image MIME types accepted by all providers. */
418
481
  type SupportedMimeType = "image/jpeg" | "image/png" | "image/webp" | "image/gif";
482
+ /** Supported video MIME types the client can accept and sample frames from. */
483
+ type SupportedVideoMimeType = "video/mp4" | "video/webm" | "video/quicktime" | "video/x-matroska";
419
484
  /** Supported provider identifiers. */
420
485
  type ProviderName = "anthropic" | "openai" | "google";
421
486
  /**
@@ -445,10 +510,20 @@ interface VisualAIConfig {
445
510
  /** Optional instructions for `check()`. */
446
511
  interface CheckOptions {
447
512
  instructions?: readonly string[];
513
+ /**
514
+ * Frame-sampling configuration applied when the input is a video.
515
+ * Ignored for image inputs. See `VideoSamplingOptions` for defaults.
516
+ */
517
+ video?: VideoSamplingOptions;
448
518
  }
449
519
  /** Optional instructions for `ask()`. */
450
520
  interface AskOptions {
451
521
  instructions?: readonly string[];
522
+ /**
523
+ * Frame-sampling configuration applied when the input is a video.
524
+ * Ignored for image inputs. See `VideoSamplingOptions` for defaults.
525
+ */
526
+ video?: VideoSamplingOptions;
452
527
  }
453
528
  /** Metadata and binary content for an AI-generated diff image. */
454
529
  interface DiffImageResult {
@@ -487,6 +562,42 @@ interface ContentOptions {
487
562
  checks?: ContentCheckName[];
488
563
  instructions?: readonly string[];
489
564
  }
565
+ /** Internal normalized image representation passed to provider drivers. */
566
+ interface NormalizedImage {
567
+ readonly data: Buffer;
568
+ readonly mimeType: SupportedMimeType;
569
+ readonly base64: string;
570
+ }
571
+ /**
572
+ * Options for sampling frames from a video input. Defaults match the v1
573
+ * sampling strategy: 1 fps, capped at 10 frames, max duration 10 s.
574
+ */
575
+ interface VideoSamplingOptions {
576
+ /** Sampling rate in frames per second. Default `1`. */
577
+ fps?: number;
578
+ /**
579
+ * Maximum number of frames extracted regardless of duration. Default `10`.
580
+ * Hard-capped at `60` to keep memory bounded; values above the cap throw
581
+ * `VisualAIVideoError`.
582
+ */
583
+ maxFrames?: number;
584
+ /**
585
+ * Maximum video duration accepted, in seconds. Videos longer than this
586
+ * cause `VisualAIVideoError` to be thrown before any provider call.
587
+ * Default `10`.
588
+ */
589
+ maxDurationSeconds?: number;
590
+ }
591
+ /**
592
+ * A single frame extracted from a video input. Identical in shape to
593
+ * `NormalizedImage` so it can be passed transparently to provider drivers.
594
+ */
595
+ interface Frame extends NormalizedImage {
596
+ /** 0-based timestamp (seconds, from the start of the clip) of this frame. */
597
+ readonly timestampSeconds: number;
598
+ /** 0-based index of this frame within the sampled sequence. */
599
+ readonly index: number;
600
+ }
490
601
 
491
602
  /**
492
603
  * High-level client for running visual checks against screenshots or other images.
@@ -499,14 +610,22 @@ interface ContentOptions {
499
610
  */
500
611
  interface VisualAIClient {
501
612
  /**
502
- * Verifies one or more statements against a single image.
613
+ * Verifies one or more statements against a single image or video.
503
614
  *
504
- * @param image Image source as a buffer, URL, file path, or base64 string.
505
- * @param statements One or more statements to validate against the image.
506
- * @param options Optional additional instructions appended to the prompt.
615
+ * Pass an image (PNG/JPEG/WebP/GIF) for a single-frame check. Pass a video
616
+ * (MP4/WebM/MOV/MKV file path, URL, base64, Buffer) and the client samples
617
+ * frames automatically; statements pass if they are true at any sampled
618
+ * frame, and each statement result includes the timestamp where it
619
+ * matched. The `frames` metadata on the result reports which timestamps
620
+ * the model saw.
621
+ *
622
+ * @param input Image or video source as a buffer, URL, file path, or base64 string.
623
+ * @param statements One or more statements to validate against the input.
624
+ * @param options Optional additional instructions and video sampling overrides.
507
625
  * @returns A structured result describing pass/fail, issues, and statement reasoning.
508
626
  * @throws {VisualAIConfigError} When no statements are provided.
509
- * @throws {VisualAIImageError} When the image cannot be loaded or decoded.
627
+ * @throws {VisualAIImageError} When an image input cannot be loaded or decoded.
628
+ * @throws {VisualAIVideoError} When a video input cannot be loaded, exceeds the duration cap, or ffmpeg is missing.
510
629
  * @throws {VisualAIError} When the provider rejects the request or returns invalid output.
511
630
  * @example
512
631
  * ```ts
@@ -515,23 +634,35 @@ interface VisualAIClient {
515
634
  * "There is no error banner",
516
635
  * ]);
517
636
  * ```
637
+ * @example
638
+ * ```ts
639
+ * const result = await client.check("./recording.webm", [
640
+ * 'A success toast with text "Saved" briefly appears',
641
+ * ]);
642
+ * console.log(result.statements[0].timestampSeconds); // e.g. 3.5
643
+ * ```
518
644
  */
519
- check(image: ImageInput, statements: string | string[], options?: CheckOptions): Promise<CheckResult>;
645
+ check(input: MediaInput, statements: string | string[], options?: CheckOptions): Promise<CheckResult>;
520
646
  /**
521
- * Asks an open-ended question about an image and returns a structured summary.
647
+ * Asks an open-ended question about an image or video and returns a structured summary.
522
648
  *
523
- * @param image Image source as a buffer, URL, file path, or base64 string.
524
- * @param prompt Prompt describing what to inspect in the image.
525
- * @param options Optional additional instructions appended to the prompt.
649
+ * Video inputs are sampled into frames and analyzed as a chronological
650
+ * timeline. The result's `frameReferences` array surfaces which frames the
651
+ * model relied on for its answer.
652
+ *
653
+ * @param input Image or video source as a buffer, URL, file path, or base64 string.
654
+ * @param prompt Prompt describing what to inspect in the input.
655
+ * @param options Optional additional instructions and video sampling overrides.
526
656
  * @returns A summary with any detected issues.
527
- * @throws {VisualAIImageError} When the image cannot be loaded or decoded.
657
+ * @throws {VisualAIImageError} When an image input cannot be loaded or decoded.
658
+ * @throws {VisualAIVideoError} When a video input cannot be loaded, exceeds the duration cap, or ffmpeg is missing.
528
659
  * @throws {VisualAIError} When the provider rejects the request or returns invalid output.
529
660
  * @example
530
661
  * ```ts
531
662
  * const result = await client.ask(screenshot, "What looks visually broken on this page?");
532
663
  * ```
533
664
  */
534
- ask(image: ImageInput, prompt: string, options?: AskOptions): Promise<AskResult>;
665
+ ask(input: MediaInput, prompt: string, options?: AskOptions): Promise<AskResult>;
535
666
  /**
536
667
  * Compares two images and reports meaningful visual differences.
537
668
  *
@@ -552,7 +683,8 @@ interface VisualAIClient {
552
683
  */
553
684
  compare(imageA: ImageInput, imageB: ImageInput, options?: CompareOptions): Promise<CompareResult>;
554
685
  /**
555
- * Checks that the listed elements are visible in an image.
686
+ * Checks that the listed elements are visible in an image. Image input only —
687
+ * template helpers do not accept video input; use `check()` for video.
556
688
  *
557
689
  * @param image Image source as a buffer, URL, file path, or base64 string.
558
690
  * @param elements Element descriptions that should be present and visible.
@@ -568,7 +700,8 @@ interface VisualAIClient {
568
700
  */
569
701
  elementsVisible(image: ImageInput, elements: string[], options?: ElementsVisibilityOptions): Promise<CheckResult>;
570
702
  /**
571
- * Checks that the listed elements are not visible in an image.
703
+ * Checks that the listed elements are not visible in an image. Image input
704
+ * only — template helpers do not accept video input; use `check()` for video.
572
705
  *
573
706
  * @param image Image source as a buffer, URL, file path, or base64 string.
574
707
  * @param elements Element descriptions that should be absent or hidden.
@@ -584,7 +717,8 @@ interface VisualAIClient {
584
717
  */
585
718
  elementsHidden(image: ImageInput, elements: string[], options?: ElementsVisibilityOptions): Promise<CheckResult>;
586
719
  /**
587
- * Runs the built-in accessibility template against an image.
720
+ * Runs the built-in accessibility template against an image. Image input
721
+ * only — template helpers do not accept video input.
588
722
  *
589
723
  * @param image Image source as a buffer, URL, file path, or base64 string.
590
724
  * @param options Optional checks and extra instructions for the accessibility prompt.
@@ -598,7 +732,8 @@ interface VisualAIClient {
598
732
  */
599
733
  accessibility(image: ImageInput, options?: AccessibilityOptions): Promise<CheckResult>;
600
734
  /**
601
- * Runs the built-in layout template against an image.
735
+ * Runs the built-in layout template against an image. Image input only —
736
+ * template helpers do not accept video input.
602
737
  *
603
738
  * @param image Image source as a buffer, URL, file path, or base64 string.
604
739
  * @param options Optional checks and extra instructions for the layout prompt.
@@ -612,7 +747,8 @@ interface VisualAIClient {
612
747
  */
613
748
  layout(image: ImageInput, options?: LayoutOptions): Promise<CheckResult>;
614
749
  /**
615
- * Runs the built-in page-load template against an image.
750
+ * Runs the built-in page-load template against an image. Image input only —
751
+ * template helpers do not accept video input.
616
752
  *
617
753
  * @param image Image source as a buffer, URL, file path, or base64 string.
618
754
  * @param options Optional page-load expectations and extra instructions.
@@ -626,7 +762,8 @@ interface VisualAIClient {
626
762
  */
627
763
  pageLoad(image: ImageInput, options?: PageLoadOptions): Promise<CheckResult>;
628
764
  /**
629
- * Runs the built-in content template against an image.
765
+ * Runs the built-in content template against an image. Image input only —
766
+ * template helpers do not accept video input.
630
767
  *
631
768
  * @param image Image source as a buffer, URL, file path, or base64 string.
632
769
  * @param options Optional content checks and extra instructions.
@@ -674,7 +811,7 @@ declare function visualAI(config?: VisualAIConfig): VisualAIClient;
674
811
  /**
675
812
  * Discrete error codes exposed by visual-ai-assertions for programmatic handling.
676
813
  */
677
- type VisualAIErrorCode = "VISUAL_AI_ERROR" | "AUTH_FAILED" | "RATE_LIMITED" | "PROVIDER_ERROR" | "IMAGE_INVALID" | "RESPONSE_PARSE_FAILED" | "RESPONSE_TRUNCATED" | "CONFIG_INVALID" | "ASSERTION_FAILED";
814
+ type VisualAIErrorCode = "VISUAL_AI_ERROR" | "AUTH_FAILED" | "RATE_LIMITED" | "PROVIDER_ERROR" | "IMAGE_INVALID" | "VIDEO_INVALID" | "RESPONSE_PARSE_FAILED" | "RESPONSE_TRUNCATED" | "CONFIG_INVALID" | "ASSERTION_FAILED";
678
815
  /**
679
816
  * Base class for all library errors.
680
817
  *
@@ -747,6 +884,20 @@ declare class VisualAIImageError extends VisualAIError<"IMAGE_INVALID"> {
747
884
  readonly code: "IMAGE_INVALID";
748
885
  constructor(message: string);
749
886
  }
887
+ /**
888
+ * Thrown when a video input cannot be loaded, decoded, or sampled — including
889
+ * when the optional ffmpeg peer dependencies are missing, the source is corrupt,
890
+ * or the duration exceeds the configured cap.
891
+ *
892
+ * @example
893
+ * ```ts
894
+ * throw new VisualAIVideoError("Video duration 14.2s exceeds limit of 10s");
895
+ * ```
896
+ */
897
+ declare class VisualAIVideoError extends VisualAIError<"VIDEO_INVALID"> {
898
+ readonly code: "VIDEO_INVALID";
899
+ constructor(message: string);
900
+ }
750
901
  /**
751
902
  * Thrown when a provider response cannot be parsed into the library result schema.
752
903
  *
@@ -809,7 +960,7 @@ declare class VisualAIAssertionError extends VisualAIError<"ASSERTION_FAILED"> {
809
960
  /**
810
961
  * Union of all concrete error subclasses exposed by the library.
811
962
  */
812
- type VisualAIKnownError = VisualAIAuthError | VisualAIRateLimitError | VisualAIProviderError | VisualAIImageError | VisualAIResponseParseError | VisualAITruncationError | VisualAIConfigError | VisualAIAssertionError;
963
+ type VisualAIKnownError = VisualAIAuthError | VisualAIRateLimitError | VisualAIProviderError | VisualAIImageError | VisualAIVideoError | VisualAIResponseParseError | VisualAITruncationError | VisualAIConfigError | VisualAIAssertionError;
813
964
  /**
814
965
  * Narrows an unknown thrown value to the concrete visual-ai-assertions error union.
815
966
  *
@@ -888,4 +1039,4 @@ declare function assertVisualResult(result: CheckResult, label?: string): void;
888
1039
  */
889
1040
  declare function assertVisualCompareResult(result: CompareResult, label?: string): void;
890
1041
 
891
- export { Accessibility, type AccessibilityCheckName, type AccessibilityOptions, type AskOptions, type AskResult, AskResultSchema, type ChangeEntry, ChangeEntrySchema, type CheckOptions, type CheckResult, CheckResultSchema, type CompareOptions, type CompareResult, CompareResultSchema, type Confidence, ConfidenceSchema, Content, type ContentCheckName, type ContentOptions, DEFAULT_MODELS, type DiffImageResult, type ElementsVisibilityOptions, type ImageInput, type Issue, type IssueCategory, IssueCategorySchema, type IssuePriority, IssuePrioritySchema, IssueSchema, type KnownModelName, Layout, type LayoutCheckName, type LayoutOptions, Model, type PageLoadOptions, Provider, type ProviderName, ReasoningEffort, type ReasoningEffortLevel, type StatementResult, StatementResultSchema, type SupportedMimeType, type UsageInfo, UsageInfoSchema, VisualAIAssertionError, VisualAIAuthError, type VisualAIClient, type VisualAIConfig, VisualAIConfigError, VisualAIError, type VisualAIErrorCode, VisualAIImageError, type VisualAIKnownError, VisualAIProviderError, VisualAIRateLimitError, VisualAIResponseParseError, VisualAITruncationError, assertVisualCompareResult, assertVisualResult, formatCheckResult, formatCompareResult, isVisualAIKnownError, visualAI };
1042
+ export { Accessibility, type AccessibilityCheckName, type AccessibilityOptions, type AskOptions, type AskResult, AskResultSchema, type ChangeEntry, ChangeEntrySchema, type CheckOptions, type CheckResult, CheckResultSchema, type CompareOptions, type CompareResult, CompareResultSchema, type Confidence, ConfidenceSchema, Content, type ContentCheckName, type ContentOptions, DEFAULT_MODELS, type DiffImageResult, type ElementsVisibilityOptions, type Frame, type ImageInput, type Issue, type IssueCategory, IssueCategorySchema, type IssuePriority, IssuePrioritySchema, IssueSchema, type KnownModelName, Layout, type LayoutCheckName, type LayoutOptions, type MediaInput, Model, type PageLoadOptions, Provider, type ProviderName, ReasoningEffort, type ReasoningEffortLevel, type StatementResult, StatementResultSchema, type SupportedMimeType, type SupportedVideoMimeType, type UsageInfo, UsageInfoSchema, type VideoFramesMetadata, type VideoSamplingOptions, VisualAIAssertionError, VisualAIAuthError, type VisualAIClient, type VisualAIConfig, VisualAIConfigError, VisualAIError, type VisualAIErrorCode, VisualAIImageError, type VisualAIKnownError, VisualAIProviderError, VisualAIRateLimitError, VisualAIResponseParseError, VisualAITruncationError, VisualAIVideoError, assertVisualCompareResult, assertVisualResult, formatCheckResult, formatCompareResult, isVisualAIKnownError, visualAI };