npm - @overshoot/sdk - Versions diffs - 0.1.0-alpha.0 - Mend

@overshoot/sdk 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Overshoot
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,315 @@
+# Overshoot SDK
+> **⚠️ Alpha Release**: This is an alpha version (0.1.0-alpha.0). The API may change in future versions.
+TypeScript SDK for real-time AI vision analysis on live video streams.
+## Installation
+```bash
+npm install overshoot@alpha
+```
+Or install a specific alpha version:
+```bash
+npm install overshoot@0.1.0-alpha.0
+```
+## Quick Start
+### Camera Source
+```typescript
+import { RealtimeVision } from "overshoot";
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key-here",
+  prompt:
+    "Read any visible text and return JSON: {text: string | null, confidence: number}",
+  onResult: (result) => {
+    console.log(result.result);
+    console.log(`Latency: ${result.total_latency_ms}ms`);
+  },
+});
+await vision.start();
+```
+### Video File Source
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key-here",
+  prompt: "Detect all objects in the video and count them",
+  source: {
+    type: "video",
+    file: videoFile, // File object from <input type="file">
+  },
+  onResult: (result) => {
+    console.log(result.result);
+  },
+});
+await vision.start();
+```
+## Configuration
+### RealtimeVisionConfig
+```typescript
+interface RealtimeVisionConfig {
+  // Required
+  apiUrl: string; // API endpoint
+  apiKey: string; // API key for authentication
+  prompt: string; // Task description for the model
+  onResult: (result: StreamInferenceResult) => void;
+  // Optional
+  source?: StreamSource; // Video source (default: environment-facing camera)
+  backend?: "overshoot" | "gemini"; // Model backend (default: "overshoot")
+  model?: string; // Model name (default: "Qwen/Qwen3-VL-30B-A3B-Instruct")
+  outputSchema?: Record<string, any>; // JSON schema for structured output
+  onError?: (error: Error) => void;
+  debug?: boolean; // Enable debug logging (default: false)
+  processing?: {
+    fps?: number; // Actual source frames per second (1-120)
+    sampling_ratio?: number; // Fraction of frames to process (0-1, default: 0.1)
+    clip_length_seconds?: number; // Size of each clip that the VLM infers on (0.1-60, default: 1.0)
+    delay_seconds?: number; // Shift between clips (0-60, default: 1.0)
+  };
+  iceServers?: RTCIceServer[]; // Custom WebRTC ICE servers
+}
+```
+### StreamSource
+```typescript
+type StreamSource =
+  | { type: "camera"; cameraFacing: "user" | "environment" }
+  | { type: "video"; file: File };
+```
+## API Methods
+```typescript
+// Lifecycle
+await vision.start(); // Start the video stream
+await vision.stop(); // Stop and cleanup resources
+// Runtime control
+await vision.updatePrompt(newPrompt); // Update task while running
+// State access
+vision.getMediaStream(); // Get MediaStream for video preview
+vision.getStreamId(); // Get current stream ID
+vision.isActive(); // Check if stream is running
+```
+## Examples
+### Object Detection with Structured Output
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key",
+  prompt: "Detect objects and return JSON: {objects: string[], count: number}",
+  outputSchema: {
+    type: "object",
+    properties: {
+      objects: { type: "array", items: { type: "string" } },
+      count: { type: "integer" },
+    },
+    required: ["objects", "count"],
+  },
+  onResult: (result) => {
+    const data = JSON.parse(result.result);
+    console.log(`Found ${data.count} objects:`, data.objects);
+  },
+});
+await vision.start();
+```
+### Text Recognition (OCR)
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key",
+  prompt: "Read all visible text in the image",
+  onResult: (result) => {
+    console.log("Text:", result.result);
+  },
+});
+await vision.start();
+```
+### Video Preview Display
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key",
+  prompt: "Describe what you see",
+  onResult: (result) => console.log(result.result),
+});
+await vision.start();
+// Attach to video element for preview
+const videoElement = document.querySelector("video");
+const stream = vision.getMediaStream();
+if (stream) {
+  videoElement.srcObject = stream;
+}
+```
+### Dynamic Prompt Updates
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key",
+  prompt: "Count people",
+  onResult: (result) => console.log(result.result),
+});
+await vision.start();
+// Change task without restarting stream
+await vision.updatePrompt("Detect vehicles instead");
+```
+### Debug Mode
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key",
+  prompt: "Detect objects",
+  debug: true, // Enable detailed logging
+  onResult: (result) => console.log(result.result),
+});
+await vision.start();
+// Console will show detailed connection and processing logs
+```
+## Error Handling
+```typescript
+const vision = new RealtimeVision({
+  apiUrl: "https://api.overshoot.ai",
+  apiKey: "your-api-key",
+  prompt: "Detect objects",
+  onResult: (result) => {
+    if (result.ok) {
+      console.log("Success:", result.result);
+    } else {
+      console.error("Inference error:", result.error);
+    }
+  },
+  onError: (error) => {
+    if (error.name === "UnauthorizedError") {
+      console.error("Invalid API key");
+    } else if (error.name === "NetworkError") {
+      console.error("Network error:", error.message);
+    } else {
+      console.error("Error:", error);
+    }
+  },
+});
+try {
+  await vision.start();
+} catch (error) {
+  console.error("Failed to start:", error);
+}
+```
+## Result Format
+The `onResult` callback receives a `StreamInferenceResult` object:
+```typescript
+interface StreamInferenceResult {
+  id: string; // Result ID
+  stream_id: string; // Stream ID
+  model_backend: "gemini" | "overshoot";
+  model_name: string; // Model used
+  prompt: string; // Task that was run
+  result: string; // Model output (text or JSON string)
+  inference_latency_ms: number; // Model inference time
+  total_latency_ms: number; // End-to-end latency
+  ok: boolean; // Success status
+  error: string | null; // Error message if failed
+}
+```
+## Use Cases
+- Real-time text extraction and OCR
+- Safety monitoring (PPE detection, hazard identification)
+- Accessibility tools (scene description)
+- Gesture recognition and control
+- Document scanning and alignment detection
+- Sports and fitness form analysis
+- Video file content analysis
+## Error Types
+The SDK provides specific error classes for different failure modes:
+- `ValidationError` - Invalid configuration or parameters
+- `UnauthorizedError` - Invalid or revoked API key
+- `NotFoundError` - Stream or resource not found
+- `NetworkError` - Network connectivity issues
+- `ServerError` - Server-side errors
+- `ApiError` - General API errors
+## Development
+```bash
+# Install dependencies
+npm install
+# Build
+npm run build
+# Test
+npm test
+npm run test:watch
+# Type check
+npm run type-check
+# Lint
+npm run lint
+```
+## Browser Compatibility
+Requires browsers with support for:
+- WebRTC (RTCPeerConnection)
+- MediaStream API
+- WebSocket
+- Modern JavaScript (ES2020+)
+Supported browsers: Chrome 80+, Firefox 75+, Safari 14+, Edge 80+
+## Feedback
+As this is an alpha release, we welcome your feedback! Please report issues or suggestions through GitHub issues.
+## License
+MIT

package/dist/index.d.mts ADDED Viewed

@@ -0,0 +1,302 @@
+type StreamSource = {
+    type: "camera";
+    cameraFacing: "user" | "environment";
+} | {
+    type: "video";
+    file: File;
+};
+type WebRtcOffer = {
+    type: "offer";
+    sdp: string;
+};
+type WebRtcAnswer = {
+    type: "answer";
+    sdp: string;
+};
+type StreamProcessingConfig = {
+    sampling_ratio: number;
+    fps: number;
+    clip_length_seconds?: number;
+    delay_seconds?: number;
+};
+type StreamInferenceConfig = {
+    prompt: string;
+    backend: "gemini" | "overshoot";
+    model: string;
+    output_schema_json?: Record<string, any>;
+};
+type StreamClientMeta = {
+    request_id?: string;
+};
+type StreamCreateRequest = {
+    webrtc: WebRtcOffer;
+    processing: StreamProcessingConfig;
+    inference: StreamInferenceConfig;
+    client?: StreamClientMeta;
+};
+type StreamCreateResponse = {
+    stream_id: string;
+    webrtc: WebRtcAnswer;
+    lease?: {
+        ttl_seconds: number;
+    };
+    turn_servers?: RTCIceServer[];
+};
+type StreamInferenceResult = {
+    id: string;
+    stream_id: string;
+    model_backend: "gemini" | "overshoot";
+    model_name: string;
+    prompt: string;
+    result: string;
+    inference_latency_ms: number;
+    total_latency_ms: number;
+    ok: boolean;
+    error: string | null;
+};
+type StreamConfigResponse = {
+    id: string;
+    stream_id: string;
+    prompt: string;
+    backend: "gemini" | "overshoot";
+    model: string;
+    output_schema_json?: Record<string, any>;
+    created_at?: string;
+    updated_at?: string;
+};
+type FeedbackCreateRequest = {
+    rating: number;
+    category: string;
+    feedback?: string;
+};
+type FeedbackResponse = {
+    id: string;
+    stream_id: string;
+    rating: number;
+    category: string;
+    feedback: string;
+    created_at?: string;
+    updated_at?: string;
+};
+type KeepaliveResponse = {
+    status: "ok";
+    stream_id: string;
+    ttl_seconds: number;
+};
+type StatusResponse = {
+    status: "ok";
+};
+type ErrorResponse = {
+    error: string;
+    message?: string;
+    request_id?: string;
+    details?: any;
+};
+type ClientConfig = {
+    baseUrl: string;
+    apiKey: string;
+};
+declare class StreamClient {
+    private baseUrl;
+    private apiKey;
+    constructor(config: ClientConfig);
+    private request;
+    createStream(request: StreamCreateRequest): Promise<StreamCreateResponse>;
+    renewLease(streamId: string): Promise<KeepaliveResponse>;
+    updatePrompt(streamId: string, prompt: string): Promise<StreamConfigResponse>;
+    submitFeedback(streamId: string, feedback: FeedbackCreateRequest): Promise<StatusResponse>;
+    getAllFeedback(): Promise<FeedbackResponse[]>;
+    connectWebSocket(streamId: string): WebSocket;
+    /**
+     * Health check endpoint (for testing, uses internal port if available)
+     * Note: This endpoint may not be available via the main API
+     */
+    healthCheck(): Promise<string>;
+}
+interface RealtimeVisionConfig {
+    /**
+     * Base URL for the API (e.g., "https://api.example.com")
+     */
+    apiUrl: string;
+    /**
+     * API key for authentication
+     * Required for all API requests
+     */
+    apiKey: string;
+    /**
+     * The prompt/task to run on window segments of the stream.
+     * This runs continuously (at a defined window interval).
+     *
+     * Examples:
+     * - "Read any visible text"
+     * - "Detect objects and return as JSON array"
+     * - "Describe facial expression"
+     */
+    prompt: string;
+    /**
+     * Video source configuration
+     * Defaults to camera with environment facing if not specified
+     */
+    source?: StreamSource;
+    /**
+     * Model backend to use
+     */
+    backend?: "gemini" | "overshoot";
+    /**
+     * Model name to use for inference
+     */
+    model?: string;
+    /**
+     * Optional JSON schema for structured output
+     */
+    outputSchema?: Record<string, any>;
+    /**
+     * Called when a new inference result arrives (~1 per second)
+     */
+    onResult: (result: StreamInferenceResult) => void;
+    /**
+     * Called when an error occurs
+     */
+    onError?: (error: Error) => void;
+    /**
+     * Custom processing configuration
+     * All fields are optional and will use defaults if not provided
+     */
+    processing?: {
+        /**
+         * Sampling ratio (0-1). Controls what fraction of frames are processed.
+         */
+        sampling_ratio?: number;
+        /**
+         * Frames per second (1-120)
+         */
+        fps?: number;
+        /**
+         * Clip length in seconds (0.1-60)
+         */
+        clip_length_seconds?: number;
+        /**
+         * Delay in seconds (0-60)
+         */
+        delay_seconds?: number;
+    };
+    /**
+     * ICE servers for WebRTC connection
+     * If not provided, uses default TURN servers
+     */
+    iceServers?: RTCIceServer[];
+    /**
+     * Enable debug logging
+     * @default false
+     */
+    debug?: boolean;
+}
+declare class RealtimeVision {
+    private config;
+    private client;
+    private logger;
+    private mediaStream;
+    private peerConnection;
+    private webSocket;
+    private streamId;
+    private keepaliveInterval;
+    private videoElement;
+    private isRunning;
+    constructor(config: RealtimeVisionConfig);
+    /**
+     * Validate configuration values
+     */
+    private validateConfig;
+    /**
+     * Create media stream from the configured source
+     */
+    private createMediaStream;
+    /**
+     * Get FPS from media stream
+     */
+    private getStreamFps;
+    /**
+     * Get processing configuration with defaults applied
+     */
+    private getProcessingConfig;
+    /**
+     * Get the effective source configuration
+     */
+    private getSource;
+    /**
+     * Start the vision stream
+     */
+    start(): Promise<void>;
+    /**
+     * Set up keepalive interval with error handling
+     */
+    private setupKeepalive;
+    /**
+     * Set up WebSocket connection with error handling
+     */
+    private setupWebSocket;
+    /**
+     * Handle non-fatal errors (report but don't stop stream)
+     */
+    private handleNonFatalError;
+    /**
+     * Handle fatal errors (stop stream and report)
+     */
+    private handleFatalError;
+    /**
+     * Update the prompt/task while stream is running
+     */
+    updatePrompt(prompt: string): Promise<void>;
+    /**
+     * Stop the vision stream and clean up resources
+     */
+    stop(): Promise<void>;
+    /**
+     * Submit feedback for the stream
+     */
+    submitFeedback(feedback: {
+        rating: number;
+        category: string;
+        feedback?: string;
+    }): Promise<void>;
+    /**
+     * Get the current stream ID
+     */
+    getStreamId(): string | null;
+    /**
+     * Get the media stream (for displaying video preview)
+     */
+    getMediaStream(): MediaStream | null;
+    /**
+     * Check if the stream is running
+     */
+    isActive(): boolean;
+    private cleanup;
+}
+declare class ApiError extends Error {
+    readonly statusCode?: number;
+    readonly requestId?: string;
+    readonly details?: any;
+    constructor(message: string, statusCode?: number, requestId?: string, details?: any);
+}
+declare class UnauthorizedError extends ApiError {
+    constructor(message: string, requestId?: string);
+}
+declare class ValidationError extends ApiError {
+    constructor(message: string, requestId?: string, details?: any);
+}
+declare class NotFoundError extends ApiError {
+    constructor(message: string, requestId?: string);
+}
+declare class NetworkError extends ApiError {
+    readonly cause?: Error;
+    constructor(message: string, cause?: Error);
+}
+declare class ServerError extends ApiError {
+    constructor(message: string, requestId?: string, details?: any);
+}
+export { ApiError, type ErrorResponse, type FeedbackCreateRequest, type FeedbackResponse, type KeepaliveResponse, NetworkError, NotFoundError, RealtimeVision, type RealtimeVisionConfig, ServerError, type StatusResponse, StreamClient, type StreamClientMeta, type StreamConfigResponse, type StreamCreateRequest, type StreamCreateResponse, type StreamInferenceConfig, type StreamInferenceResult, type StreamProcessingConfig, type StreamSource, UnauthorizedError, ValidationError, type WebRtcAnswer, type WebRtcOffer };