npm - visual-ai-assertions - Versions diffs - 0.7.2 → 0.9.0 - Mend

visual-ai-assertions 0.7.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -18,11 +18,13 @@ declare const Provider: {
 /** Known model names grouped by provider. */
 declare const Model: {
     readonly Anthropic: {
+        readonly OPUS_4_7: "claude-opus-4-7";
         readonly OPUS_4_6: "claude-opus-4-6";
         readonly SONNET_4_6: "claude-sonnet-4-6";
         readonly HAIKU_4_5: "claude-haiku-4-5";
     };
     readonly OpenAI: {
+        readonly GPT_5_5: "gpt-5.5";
         readonly GPT_5_4: "gpt-5.4";
         readonly GPT_5_4_PRO: "gpt-5.4-pro";
         readonly GPT_5_4_MINI: "gpt-5.4-mini";
@@ -117,16 +119,24 @@ declare const StatementResultSchema: z.ZodObject<{
     pass: z.ZodBoolean;
     reasoning: z.ZodString;
     confidence: z.ZodOptional<z.ZodEnum<["high", "medium", "low"]>>;
+    /**
+     * For video inputs, the approximate timestamp (in seconds, from the start of the clip)
+     * of the frame that most clearly demonstrates the statement. `null` when the statement
+     * fails or applies across the whole clip. Always omitted for image inputs.
+     */
+    timestampSeconds: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
 }, "strip", z.ZodTypeAny, {
     statement: string;
     pass: boolean;
     reasoning: string;
     confidence?: "high" | "medium" | "low" | undefined;
+    timestampSeconds?: number | null | undefined;
 }, {
     statement: string;
     pass: boolean;
     reasoning: string;
     confidence?: "high" | "medium" | "low" | undefined;
+    timestampSeconds?: number | null | undefined;
 }>;
 /** Outcome of a single statement evaluated by `check()`. */
 type StatementResult = z.infer<typeof StatementResultSchema>;
@@ -153,7 +163,14 @@ declare const UsageInfoSchema: z.ZodObject<{
 }>;
 /** Token usage and optional cost/latency metadata for a provider call. */
 type UsageInfo = z.infer<typeof UsageInfoSchema>;
-/** Zod schema for results returned by `check()` and template helpers. */
+/**
+ * Zod schema for results returned by `check()` and template helpers.
+ *
+ * Note: the runtime `CheckResult` TypeScript type extends this schema with
+ * an optional `frames` field that is populated client-side for video inputs.
+ * Parsing a stored `CheckResult` through this schema will silently drop
+ * `frames` because the schema only describes what the model returns.
+ */
 declare const CheckResultSchema: z.ZodObject<{
     pass: z.ZodBoolean;
     reasoning: z.ZodString;
@@ -199,16 +216,24 @@ declare const CheckResultSchema: z.ZodObject<{
         pass: z.ZodBoolean;
         reasoning: z.ZodString;
         confidence: z.ZodOptional<z.ZodEnum<["high", "medium", "low"]>>;
+        /**
+         * For video inputs, the approximate timestamp (in seconds, from the start of the clip)
+         * of the frame that most clearly demonstrates the statement. `null` when the statement
+         * fails or applies across the whole clip. Always omitted for image inputs.
+         */
+        timestampSeconds: z.ZodOptional<z.ZodNullable<z.ZodNumber>>;
     }, "strip", z.ZodTypeAny, {
         statement: string;
         pass: boolean;
         reasoning: string;
         confidence?: "high" | "medium" | "low" | undefined;
+        timestampSeconds?: number | null | undefined;
     }, {
         statement: string;
         pass: boolean;
         reasoning: string;
         confidence?: "high" | "medium" | "low" | undefined;
+        timestampSeconds?: number | null | undefined;
     }>, "many">;
 }, "strip", z.ZodTypeAny, {
     issues: {
@@ -224,6 +249,7 @@ declare const CheckResultSchema: z.ZodObject<{
         pass: boolean;
         reasoning: string;
         confidence?: "high" | "medium" | "low" | undefined;
+        timestampSeconds?: number | null | undefined;
     }[];
     usage?: {
         inputTokens: number;
@@ -246,6 +272,7 @@ declare const CheckResultSchema: z.ZodObject<{
         pass: boolean;
         reasoning: string;
         confidence?: "high" | "medium" | "low" | undefined;
+        timestampSeconds?: number | null | undefined;
     }[];
     usage?: {
         inputTokens: number;
@@ -255,8 +282,23 @@ declare const CheckResultSchema: z.ZodObject<{
         durationSeconds?: number | undefined;
     } | undefined;
 }>;
+/**
+ * Metadata describing the sampled-frame timeline used when the input was a video.
+ * Populated client-side; not part of the model's response.
+ */
+interface VideoFramesMetadata {
+    /** Total number of frames sampled from the video. */
+    count: number;
+    /** Timestamp (seconds, from the start of the clip) of each sampled frame, in order. */
+    timestampsSeconds: number[];
+    /** Total duration of the source video in seconds. */
+    durationSeconds: number;
+}
 /** Result returned by `check()` and the template convenience methods. */
-type CheckResult = z.infer<typeof CheckResultSchema>;
+type CheckResult = z.infer<typeof CheckResultSchema> & {
+    /** Present only when the input was a video. Describes which frames the model saw. */
+    frames?: VideoFramesMetadata;
+};
 /** Zod schema for an individual visual change reported by `compare()`. */
 declare const ChangeEntrySchema: z.ZodObject<{
     description: z.ZodString;
@@ -338,7 +380,14 @@ declare const CompareResultSchema: z.ZodObject<{
 type CompareResult = z.infer<typeof CompareResultSchema> & {
     diffImage?: DiffImageResult;
 };
-/** Zod schema for results returned by `ask()`. */
+/**
+ * Zod schema for results returned by `ask()`.
+ *
+ * Note: the runtime `AskResult` TypeScript type extends this schema with an
+ * optional `frames` field that is populated client-side for video inputs.
+ * Parsing a stored `AskResult` through this schema will silently drop
+ * `frames` because the schema only describes what the model returns.
+ */
 declare const AskResultSchema: z.ZodObject<{
     summary: z.ZodString;
     issues: z.ZodArray<z.ZodObject<{
@@ -357,6 +406,11 @@ declare const AskResultSchema: z.ZodObject<{
         description: string;
         suggestion: string;
     }>, "many">;
+    /**
+     * For video inputs, the indices of frames the model relied on to answer.
+     * Indices are 0-based and refer to entries in `frames.timestampsSeconds`.
+     */
+    frameReferences: z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>;
     usage: z.ZodOptional<z.ZodObject<{
         inputTokens: z.ZodNumber;
         outputTokens: z.ZodNumber;
@@ -392,6 +446,7 @@ declare const AskResultSchema: z.ZodObject<{
         estimatedCost?: number | undefined;
         durationSeconds?: number | undefined;
     } | undefined;
+    frameReferences?: number[] | undefined;
 }, {
     issues: {
         priority: "critical" | "major" | "minor";
@@ -407,13 +462,25 @@ declare const AskResultSchema: z.ZodObject<{
         estimatedCost?: number | undefined;
         durationSeconds?: number | undefined;
     } | undefined;
+    frameReferences?: number[] | undefined;
 }>;
 /** Result returned by `ask()`. */
-type AskResult = z.infer<typeof AskResultSchema>;
+type AskResult = z.infer<typeof AskResultSchema> & {
+    /** Present only when the input was a video. Describes which frames the model saw. */
+    frames?: VideoFramesMetadata;
+};
 /** Supported input shapes for image arguments accepted by the client. */
 type ImageInput = Buffer | Uint8Array | string;
+/**
+ * Supported input shapes for media arguments accepted by the client.
+ * Identical to `ImageInput` today — the client auto-detects whether the bytes are
+ * an image or a video.
+ */
+type MediaInput = ImageInput;
 /** Supported image MIME types accepted by all providers. */
 type SupportedMimeType = "image/jpeg" | "image/png" | "image/webp" | "image/gif";
+/** Supported video MIME types the client can accept and sample frames from. */
+type SupportedVideoMimeType = "video/mp4" | "video/webm" | "video/quicktime" | "video/x-matroska";
 /** Supported provider identifiers. */
 type ProviderName = "anthropic" | "openai" | "google";
 /**
@@ -443,10 +510,20 @@ interface VisualAIConfig {
 /** Optional instructions for `check()`. */
 interface CheckOptions {
     instructions?: readonly string[];
+    /**
+     * Frame-sampling configuration applied when the input is a video.
+     * Ignored for image inputs. See `VideoSamplingOptions` for defaults.
+     */
+    video?: VideoSamplingOptions;
 }
 /** Optional instructions for `ask()`. */
 interface AskOptions {
     instructions?: readonly string[];
+    /**
+     * Frame-sampling configuration applied when the input is a video.
+     * Ignored for image inputs. See `VideoSamplingOptions` for defaults.
+     */
+    video?: VideoSamplingOptions;
 }
 /** Metadata and binary content for an AI-generated diff image. */
 interface DiffImageResult {
@@ -485,6 +562,42 @@ interface ContentOptions {
     checks?: ContentCheckName[];
     instructions?: readonly string[];
 }
+/** Internal normalized image representation passed to provider drivers. */
+interface NormalizedImage {
+    readonly data: Buffer;
+    readonly mimeType: SupportedMimeType;
+    readonly base64: string;
+}
+/**
+ * Options for sampling frames from a video input. Defaults match the v1
+ * sampling strategy: 1 fps, capped at 10 frames, max duration 10 s.
+ */
+interface VideoSamplingOptions {
+    /** Sampling rate in frames per second. Default `1`. */
+    fps?: number;
+    /**
+     * Maximum number of frames extracted regardless of duration. Default `10`.
+     * Hard-capped at `60` to keep memory bounded; values above the cap throw
+     * `VisualAIVideoError`.
+     */
+    maxFrames?: number;
+    /**
+     * Maximum video duration accepted, in seconds. Videos longer than this
+     * cause `VisualAIVideoError` to be thrown before any provider call.
+     * Default `10`.
+     */
+    maxDurationSeconds?: number;
+}
+/**
+ * A single frame extracted from a video input. Identical in shape to
+ * `NormalizedImage` so it can be passed transparently to provider drivers.
+ */
+interface Frame extends NormalizedImage {
+    /** 0-based timestamp (seconds, from the start of the clip) of this frame. */
+    readonly timestampSeconds: number;
+    /** 0-based index of this frame within the sampled sequence. */
+    readonly index: number;
+}
 /**
  * High-level client for running visual checks against screenshots or other images.
@@ -497,14 +610,22 @@ interface ContentOptions {
  */
 interface VisualAIClient {
     /**
-     * Verifies one or more statements against a single image.
+     * Verifies one or more statements against a single image or video.
      *
-     * @param image Image source as a buffer, URL, file path, or base64 string.
-     * @param statements One or more statements to validate against the image.
-     * @param options Optional additional instructions appended to the prompt.
+     * Pass an image (PNG/JPEG/WebP/GIF) for a single-frame check. Pass a video
+     * (MP4/WebM/MOV/MKV file path, URL, base64, Buffer) and the client samples
+     * frames automatically; statements pass if they are true at any sampled
+     * frame, and each statement result includes the timestamp where it
+     * matched. The `frames` metadata on the result reports which timestamps
+     * the model saw.
+     *
+     * @param input Image or video source as a buffer, URL, file path, or base64 string.
+     * @param statements One or more statements to validate against the input.
+     * @param options Optional additional instructions and video sampling overrides.
      * @returns A structured result describing pass/fail, issues, and statement reasoning.
      * @throws {VisualAIConfigError} When no statements are provided.
-     * @throws {VisualAIImageError} When the image cannot be loaded or decoded.
+     * @throws {VisualAIImageError} When an image input cannot be loaded or decoded.
+     * @throws {VisualAIVideoError} When a video input cannot be loaded, exceeds the duration cap, or ffmpeg is missing.
      * @throws {VisualAIError} When the provider rejects the request or returns invalid output.
      * @example
      * ```ts
@@ -513,23 +634,35 @@ interface VisualAIClient {
      *   "There is no error banner",
      * ]);
      * ```
+     * @example
+     * ```ts
+     * const result = await client.check("./recording.webm", [
+     *   'A success toast with text "Saved" briefly appears',
+     * ]);
+     * console.log(result.statements[0].timestampSeconds); // e.g. 3.5
+     * ```
      */
-    check(image: ImageInput, statements: string | string[], options?: CheckOptions): Promise<CheckResult>;
+    check(input: MediaInput, statements: string | string[], options?: CheckOptions): Promise<CheckResult>;
     /**
-     * Asks an open-ended question about an image and returns a structured summary.
+     * Asks an open-ended question about an image or video and returns a structured summary.
      *
-     * @param image Image source as a buffer, URL, file path, or base64 string.
-     * @param prompt Prompt describing what to inspect in the image.
-     * @param options Optional additional instructions appended to the prompt.
+     * Video inputs are sampled into frames and analyzed as a chronological
+     * timeline. The result's `frameReferences` array surfaces which frames the
+     * model relied on for its answer.
+     *
+     * @param input Image or video source as a buffer, URL, file path, or base64 string.
+     * @param prompt Prompt describing what to inspect in the input.
+     * @param options Optional additional instructions and video sampling overrides.
      * @returns A summary with any detected issues.
-     * @throws {VisualAIImageError} When the image cannot be loaded or decoded.
+     * @throws {VisualAIImageError} When an image input cannot be loaded or decoded.
+     * @throws {VisualAIVideoError} When a video input cannot be loaded, exceeds the duration cap, or ffmpeg is missing.
      * @throws {VisualAIError} When the provider rejects the request or returns invalid output.
      * @example
      * ```ts
      * const result = await client.ask(screenshot, "What looks visually broken on this page?");
      * ```
      */
-    ask(image: ImageInput, prompt: string, options?: AskOptions): Promise<AskResult>;
+    ask(input: MediaInput, prompt: string, options?: AskOptions): Promise<AskResult>;
     /**
      * Compares two images and reports meaningful visual differences.
      *
@@ -550,7 +683,8 @@ interface VisualAIClient {
      */
     compare(imageA: ImageInput, imageB: ImageInput, options?: CompareOptions): Promise<CompareResult>;
     /**
-     * Checks that the listed elements are visible in an image.
+     * Checks that the listed elements are visible in an image. Image input only —
+     * template helpers do not accept video input; use `check()` for video.
      *
      * @param image Image source as a buffer, URL, file path, or base64 string.
      * @param elements Element descriptions that should be present and visible.
@@ -566,7 +700,8 @@ interface VisualAIClient {
      */
     elementsVisible(image: ImageInput, elements: string[], options?: ElementsVisibilityOptions): Promise<CheckResult>;
     /**
-     * Checks that the listed elements are not visible in an image.
+     * Checks that the listed elements are not visible in an image. Image input
+     * only — template helpers do not accept video input; use `check()` for video.
      *
      * @param image Image source as a buffer, URL, file path, or base64 string.
      * @param elements Element descriptions that should be absent or hidden.
@@ -582,7 +717,8 @@ interface VisualAIClient {
      */
     elementsHidden(image: ImageInput, elements: string[], options?: ElementsVisibilityOptions): Promise<CheckResult>;
     /**
-     * Runs the built-in accessibility template against an image.
+     * Runs the built-in accessibility template against an image. Image input
+     * only — template helpers do not accept video input.
      *
      * @param image Image source as a buffer, URL, file path, or base64 string.
      * @param options Optional checks and extra instructions for the accessibility prompt.
@@ -596,7 +732,8 @@ interface VisualAIClient {
      */
     accessibility(image: ImageInput, options?: AccessibilityOptions): Promise<CheckResult>;
     /**
-     * Runs the built-in layout template against an image.
+     * Runs the built-in layout template against an image. Image input only —
+     * template helpers do not accept video input.
      *
      * @param image Image source as a buffer, URL, file path, or base64 string.
      * @param options Optional checks and extra instructions for the layout prompt.
@@ -610,7 +747,8 @@ interface VisualAIClient {
      */
     layout(image: ImageInput, options?: LayoutOptions): Promise<CheckResult>;
     /**
-     * Runs the built-in page-load template against an image.
+     * Runs the built-in page-load template against an image. Image input only —
+     * template helpers do not accept video input.
      *
      * @param image Image source as a buffer, URL, file path, or base64 string.
      * @param options Optional page-load expectations and extra instructions.
@@ -624,7 +762,8 @@ interface VisualAIClient {
      */
     pageLoad(image: ImageInput, options?: PageLoadOptions): Promise<CheckResult>;
     /**
-     * Runs the built-in content template against an image.
+     * Runs the built-in content template against an image. Image input only —
+     * template helpers do not accept video input.
      *
      * @param image Image source as a buffer, URL, file path, or base64 string.
      * @param options Optional content checks and extra instructions.
@@ -672,7 +811,7 @@ declare function visualAI(config?: VisualAIConfig): VisualAIClient;
 /**
  * Discrete error codes exposed by visual-ai-assertions for programmatic handling.
  */
-type VisualAIErrorCode = "VISUAL_AI_ERROR" | "AUTH_FAILED" | "RATE_LIMITED" | "PROVIDER_ERROR" | "IMAGE_INVALID" | "RESPONSE_PARSE_FAILED" | "RESPONSE_TRUNCATED" | "CONFIG_INVALID" | "ASSERTION_FAILED";
+type VisualAIErrorCode = "VISUAL_AI_ERROR" | "AUTH_FAILED" | "RATE_LIMITED" | "PROVIDER_ERROR" | "IMAGE_INVALID" | "VIDEO_INVALID" | "RESPONSE_PARSE_FAILED" | "RESPONSE_TRUNCATED" | "CONFIG_INVALID" | "ASSERTION_FAILED";
 /**
  * Base class for all library errors.
  *
@@ -745,6 +884,20 @@ declare class VisualAIImageError extends VisualAIError<"IMAGE_INVALID"> {
     readonly code: "IMAGE_INVALID";
     constructor(message: string);
 }
+/**
+ * Thrown when a video input cannot be loaded, decoded, or sampled — including
+ * when the optional ffmpeg peer dependencies are missing, the source is corrupt,
+ * or the duration exceeds the configured cap.
+ *
+ * @example
+ * ```ts
+ * throw new VisualAIVideoError("Video duration 14.2s exceeds limit of 10s");
+ * ```
+ */
+declare class VisualAIVideoError extends VisualAIError<"VIDEO_INVALID"> {
+    readonly code: "VIDEO_INVALID";
+    constructor(message: string);
+}
 /**
  * Thrown when a provider response cannot be parsed into the library result schema.
  *
@@ -807,7 +960,7 @@ declare class VisualAIAssertionError extends VisualAIError<"ASSERTION_FAILED"> {
 /**
  * Union of all concrete error subclasses exposed by the library.
  */
-type VisualAIKnownError = VisualAIAuthError | VisualAIRateLimitError | VisualAIProviderError | VisualAIImageError | VisualAIResponseParseError | VisualAITruncationError | VisualAIConfigError | VisualAIAssertionError;
+type VisualAIKnownError = VisualAIAuthError | VisualAIRateLimitError | VisualAIProviderError | VisualAIImageError | VisualAIVideoError | VisualAIResponseParseError | VisualAITruncationError | VisualAIConfigError | VisualAIAssertionError;
 /**
  * Narrows an unknown thrown value to the concrete visual-ai-assertions error union.
  *
@@ -886,4 +1039,4 @@ declare function assertVisualResult(result: CheckResult, label?: string): void;
  */
 declare function assertVisualCompareResult(result: CompareResult, label?: string): void;
-export { Accessibility, type AccessibilityCheckName, type AccessibilityOptions, type AskOptions, type AskResult, AskResultSchema, type ChangeEntry, ChangeEntrySchema, type CheckOptions, type CheckResult, CheckResultSchema, type CompareOptions, type CompareResult, CompareResultSchema, type Confidence, ConfidenceSchema, Content, type ContentCheckName, type ContentOptions, DEFAULT_MODELS, type DiffImageResult, type ElementsVisibilityOptions, type ImageInput, type Issue, type IssueCategory, IssueCategorySchema, type IssuePriority, IssuePrioritySchema, IssueSchema, type KnownModelName, Layout, type LayoutCheckName, type LayoutOptions, Model, type PageLoadOptions, Provider, type ProviderName, ReasoningEffort, type ReasoningEffortLevel, type StatementResult, StatementResultSchema, type SupportedMimeType, type UsageInfo, UsageInfoSchema, VisualAIAssertionError, VisualAIAuthError, type VisualAIClient, type VisualAIConfig, VisualAIConfigError, VisualAIError, type VisualAIErrorCode, VisualAIImageError, type VisualAIKnownError, VisualAIProviderError, VisualAIRateLimitError, VisualAIResponseParseError, VisualAITruncationError, assertVisualCompareResult, assertVisualResult, formatCheckResult, formatCompareResult, isVisualAIKnownError, visualAI };
+export { Accessibility, type AccessibilityCheckName, type AccessibilityOptions, type AskOptions, type AskResult, AskResultSchema, type ChangeEntry, ChangeEntrySchema, type CheckOptions, type CheckResult, CheckResultSchema, type CompareOptions, type CompareResult, CompareResultSchema, type Confidence, ConfidenceSchema, Content, type ContentCheckName, type ContentOptions, DEFAULT_MODELS, type DiffImageResult, type ElementsVisibilityOptions, type Frame, type ImageInput, type Issue, type IssueCategory, IssueCategorySchema, type IssuePriority, IssuePrioritySchema, IssueSchema, type KnownModelName, Layout, type LayoutCheckName, type LayoutOptions, type MediaInput, Model, type PageLoadOptions, Provider, type ProviderName, ReasoningEffort, type ReasoningEffortLevel, type StatementResult, StatementResultSchema, type SupportedMimeType, type SupportedVideoMimeType, type UsageInfo, UsageInfoSchema, type VideoFramesMetadata, type VideoSamplingOptions, VisualAIAssertionError, VisualAIAuthError, type VisualAIClient, type VisualAIConfig, VisualAIConfigError, VisualAIError, type VisualAIErrorCode, VisualAIImageError, type VisualAIKnownError, VisualAIProviderError, VisualAIRateLimitError, VisualAIResponseParseError, VisualAITruncationError, VisualAIVideoError, assertVisualCompareResult, assertVisualResult, formatCheckResult, formatCompareResult, isVisualAIKnownError, visualAI };