@overshoot/sdk 0.1.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Overshoot
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,315 @@
1
+ # Overshoot SDK
2
+
3
+ > **⚠️ Alpha Release**: This is an alpha version (0.1.0-alpha.0). The API may change in future versions.
4
+
5
+ TypeScript SDK for real-time AI vision analysis on live video streams.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install overshoot@alpha
11
+ ```
12
+
13
+ Or install a specific alpha version:
14
+
15
+ ```bash
16
+ npm install overshoot@0.1.0-alpha.0
17
+ ```
18
+
19
+ ## Quick Start
20
+
21
+ ### Camera Source
22
+
23
+ ```typescript
24
+ import { RealtimeVision } from "overshoot";
25
+
26
+ const vision = new RealtimeVision({
27
+ apiUrl: "https://api.overshoot.ai",
28
+ apiKey: "your-api-key-here",
29
+ prompt:
30
+ "Read any visible text and return JSON: {text: string | null, confidence: number}",
31
+ onResult: (result) => {
32
+ console.log(result.result);
33
+ console.log(`Latency: ${result.total_latency_ms}ms`);
34
+ },
35
+ });
36
+
37
+ await vision.start();
38
+ ```
39
+
40
+ ### Video File Source
41
+
42
+ ```typescript
43
+ const vision = new RealtimeVision({
44
+ apiUrl: "https://api.overshoot.ai",
45
+ apiKey: "your-api-key-here",
46
+ prompt: "Detect all objects in the video and count them",
47
+ source: {
48
+ type: "video",
49
+ file: videoFile, // File object from <input type="file">
50
+ },
51
+ onResult: (result) => {
52
+ console.log(result.result);
53
+ },
54
+ });
55
+
56
+ await vision.start();
57
+ ```
58
+
59
+ ## Configuration
60
+
61
+ ### RealtimeVisionConfig
62
+
63
+ ```typescript
64
+ interface RealtimeVisionConfig {
65
+ // Required
66
+ apiUrl: string; // API endpoint
67
+ apiKey: string; // API key for authentication
68
+ prompt: string; // Task description for the model
69
+ onResult: (result: StreamInferenceResult) => void;
70
+
71
+ // Optional
72
+ source?: StreamSource; // Video source (default: environment-facing camera)
73
+ backend?: "overshoot" | "gemini"; // Model backend (default: "overshoot")
74
+ model?: string; // Model name (default: "Qwen/Qwen3-VL-30B-A3B-Instruct")
75
+ outputSchema?: Record<string, any>; // JSON schema for structured output
76
+ onError?: (error: Error) => void;
77
+ debug?: boolean; // Enable debug logging (default: false)
78
+
79
+ processing?: {
80
+ fps?: number; // Actual source frames per second (1-120)
81
+ sampling_ratio?: number; // Fraction of frames to process (0-1, default: 0.1)
82
+ clip_length_seconds?: number; // Size of each clip that the VLM infers on (0.1-60, default: 1.0)
83
+ delay_seconds?: number; // Shift between clips (0-60, default: 1.0)
84
+ };
85
+
86
+ iceServers?: RTCIceServer[]; // Custom WebRTC ICE servers
87
+ }
88
+ ```
89
+
90
+ ### StreamSource
91
+
92
+ ```typescript
93
+ type StreamSource =
94
+ | { type: "camera"; cameraFacing: "user" | "environment" }
95
+ | { type: "video"; file: File };
96
+ ```
97
+
98
+ ## API Methods
99
+
100
+ ```typescript
101
+ // Lifecycle
102
+ await vision.start(); // Start the video stream
103
+ await vision.stop(); // Stop and cleanup resources
104
+
105
+ // Runtime control
106
+ await vision.updatePrompt(newPrompt); // Update task while running
107
+
108
+ // State access
109
+ vision.getMediaStream(); // Get MediaStream for video preview
110
+ vision.getStreamId(); // Get current stream ID
111
+ vision.isActive(); // Check if stream is running
112
+ ```
113
+
114
+ ## Examples
115
+
116
+ ### Object Detection with Structured Output
117
+
118
+ ```typescript
119
+ const vision = new RealtimeVision({
120
+ apiUrl: "https://api.overshoot.ai",
121
+ apiKey: "your-api-key",
122
+ prompt: "Detect objects and return JSON: {objects: string[], count: number}",
123
+ outputSchema: {
124
+ type: "object",
125
+ properties: {
126
+ objects: { type: "array", items: { type: "string" } },
127
+ count: { type: "integer" },
128
+ },
129
+ required: ["objects", "count"],
130
+ },
131
+ onResult: (result) => {
132
+ const data = JSON.parse(result.result);
133
+ console.log(`Found ${data.count} objects:`, data.objects);
134
+ },
135
+ });
136
+
137
+ await vision.start();
138
+ ```
139
+
140
+ ### Text Recognition (OCR)
141
+
142
+ ```typescript
143
+ const vision = new RealtimeVision({
144
+ apiUrl: "https://api.overshoot.ai",
145
+ apiKey: "your-api-key",
146
+ prompt: "Read all visible text in the image",
147
+ onResult: (result) => {
148
+ console.log("Text:", result.result);
149
+ },
150
+ });
151
+
152
+ await vision.start();
153
+ ```
154
+
155
+ ### Video Preview Display
156
+
157
+ ```typescript
158
+ const vision = new RealtimeVision({
159
+ apiUrl: "https://api.overshoot.ai",
160
+ apiKey: "your-api-key",
161
+ prompt: "Describe what you see",
162
+ onResult: (result) => console.log(result.result),
163
+ });
164
+
165
+ await vision.start();
166
+
167
+ // Attach to video element for preview
168
+ const videoElement = document.querySelector("video");
169
+ const stream = vision.getMediaStream();
170
+ if (stream) {
171
+ videoElement.srcObject = stream;
172
+ }
173
+ ```
174
+
175
+ ### Dynamic Prompt Updates
176
+
177
+ ```typescript
178
+ const vision = new RealtimeVision({
179
+ apiUrl: "https://api.overshoot.ai",
180
+ apiKey: "your-api-key",
181
+ prompt: "Count people",
182
+ onResult: (result) => console.log(result.result),
183
+ });
184
+
185
+ await vision.start();
186
+
187
+ // Change task without restarting stream
188
+ await vision.updatePrompt("Detect vehicles instead");
189
+ ```
190
+
191
+ ### Debug Mode
192
+
193
+ ```typescript
194
+ const vision = new RealtimeVision({
195
+ apiUrl: "https://api.overshoot.ai",
196
+ apiKey: "your-api-key",
197
+ prompt: "Detect objects",
198
+ debug: true, // Enable detailed logging
199
+ onResult: (result) => console.log(result.result),
200
+ });
201
+
202
+ await vision.start();
203
+ // Console will show detailed connection and processing logs
204
+ ```
205
+
206
+ ## Error Handling
207
+
208
+ ```typescript
209
+ const vision = new RealtimeVision({
210
+ apiUrl: "https://api.overshoot.ai",
211
+ apiKey: "your-api-key",
212
+ prompt: "Detect objects",
213
+ onResult: (result) => {
214
+ if (result.ok) {
215
+ console.log("Success:", result.result);
216
+ } else {
217
+ console.error("Inference error:", result.error);
218
+ }
219
+ },
220
+ onError: (error) => {
221
+ if (error.name === "UnauthorizedError") {
222
+ console.error("Invalid API key");
223
+ } else if (error.name === "NetworkError") {
224
+ console.error("Network error:", error.message);
225
+ } else {
226
+ console.error("Error:", error);
227
+ }
228
+ },
229
+ });
230
+
231
+ try {
232
+ await vision.start();
233
+ } catch (error) {
234
+ console.error("Failed to start:", error);
235
+ }
236
+ ```
237
+
238
+ ## Result Format
239
+
240
+ The `onResult` callback receives a `StreamInferenceResult` object:
241
+
242
+ ```typescript
243
+ interface StreamInferenceResult {
244
+ id: string; // Result ID
245
+ stream_id: string; // Stream ID
246
+ model_backend: "gemini" | "overshoot";
247
+ model_name: string; // Model used
248
+ prompt: string; // Task that was run
249
+ result: string; // Model output (text or JSON string)
250
+ inference_latency_ms: number; // Model inference time
251
+ total_latency_ms: number; // End-to-end latency
252
+ ok: boolean; // Success status
253
+ error: string | null; // Error message if failed
254
+ }
255
+ ```
256
+
257
+ ## Use Cases
258
+
259
+ - Real-time text extraction and OCR
260
+ - Safety monitoring (PPE detection, hazard identification)
261
+ - Accessibility tools (scene description)
262
+ - Gesture recognition and control
263
+ - Document scanning and alignment detection
264
+ - Sports and fitness form analysis
265
+ - Video file content analysis
266
+
267
+ ## Error Types
268
+
269
+ The SDK provides specific error classes for different failure modes:
270
+
271
+ - `ValidationError` - Invalid configuration or parameters
272
+ - `UnauthorizedError` - Invalid or revoked API key
273
+ - `NotFoundError` - Stream or resource not found
274
+ - `NetworkError` - Network connectivity issues
275
+ - `ServerError` - Server-side errors
276
+ - `ApiError` - General API errors
277
+
278
+ ## Development
279
+
280
+ ```bash
281
+ # Install dependencies
282
+ npm install
283
+
284
+ # Build
285
+ npm run build
286
+
287
+ # Test
288
+ npm test
289
+ npm run test:watch
290
+
291
+ # Type check
292
+ npm run type-check
293
+
294
+ # Lint
295
+ npm run lint
296
+ ```
297
+
298
+ ## Browser Compatibility
299
+
300
+ Requires browsers with support for:
301
+
302
+ - WebRTC (RTCPeerConnection)
303
+ - MediaStream API
304
+ - WebSocket
305
+ - Modern JavaScript (ES2020+)
306
+
307
+ Supported browsers: Chrome 80+, Firefox 75+, Safari 14+, Edge 80+
308
+
309
+ ## Feedback
310
+
311
+ As this is an alpha release, we welcome your feedback! Please report issues or suggestions through GitHub issues.
312
+
313
+ ## License
314
+
315
+ MIT
@@ -0,0 +1,302 @@
1
+ type StreamSource = {
2
+ type: "camera";
3
+ cameraFacing: "user" | "environment";
4
+ } | {
5
+ type: "video";
6
+ file: File;
7
+ };
8
+ type WebRtcOffer = {
9
+ type: "offer";
10
+ sdp: string;
11
+ };
12
+ type WebRtcAnswer = {
13
+ type: "answer";
14
+ sdp: string;
15
+ };
16
+ type StreamProcessingConfig = {
17
+ sampling_ratio: number;
18
+ fps: number;
19
+ clip_length_seconds?: number;
20
+ delay_seconds?: number;
21
+ };
22
+ type StreamInferenceConfig = {
23
+ prompt: string;
24
+ backend: "gemini" | "overshoot";
25
+ model: string;
26
+ output_schema_json?: Record<string, any>;
27
+ };
28
+ type StreamClientMeta = {
29
+ request_id?: string;
30
+ };
31
+ type StreamCreateRequest = {
32
+ webrtc: WebRtcOffer;
33
+ processing: StreamProcessingConfig;
34
+ inference: StreamInferenceConfig;
35
+ client?: StreamClientMeta;
36
+ };
37
+ type StreamCreateResponse = {
38
+ stream_id: string;
39
+ webrtc: WebRtcAnswer;
40
+ lease?: {
41
+ ttl_seconds: number;
42
+ };
43
+ turn_servers?: RTCIceServer[];
44
+ };
45
+ type StreamInferenceResult = {
46
+ id: string;
47
+ stream_id: string;
48
+ model_backend: "gemini" | "overshoot";
49
+ model_name: string;
50
+ prompt: string;
51
+ result: string;
52
+ inference_latency_ms: number;
53
+ total_latency_ms: number;
54
+ ok: boolean;
55
+ error: string | null;
56
+ };
57
+ type StreamConfigResponse = {
58
+ id: string;
59
+ stream_id: string;
60
+ prompt: string;
61
+ backend: "gemini" | "overshoot";
62
+ model: string;
63
+ output_schema_json?: Record<string, any>;
64
+ created_at?: string;
65
+ updated_at?: string;
66
+ };
67
+ type FeedbackCreateRequest = {
68
+ rating: number;
69
+ category: string;
70
+ feedback?: string;
71
+ };
72
+ type FeedbackResponse = {
73
+ id: string;
74
+ stream_id: string;
75
+ rating: number;
76
+ category: string;
77
+ feedback: string;
78
+ created_at?: string;
79
+ updated_at?: string;
80
+ };
81
+ type KeepaliveResponse = {
82
+ status: "ok";
83
+ stream_id: string;
84
+ ttl_seconds: number;
85
+ };
86
+ type StatusResponse = {
87
+ status: "ok";
88
+ };
89
+ type ErrorResponse = {
90
+ error: string;
91
+ message?: string;
92
+ request_id?: string;
93
+ details?: any;
94
+ };
95
+
96
+ type ClientConfig = {
97
+ baseUrl: string;
98
+ apiKey: string;
99
+ };
100
+ declare class StreamClient {
101
+ private baseUrl;
102
+ private apiKey;
103
+ constructor(config: ClientConfig);
104
+ private request;
105
+ createStream(request: StreamCreateRequest): Promise<StreamCreateResponse>;
106
+ renewLease(streamId: string): Promise<KeepaliveResponse>;
107
+ updatePrompt(streamId: string, prompt: string): Promise<StreamConfigResponse>;
108
+ submitFeedback(streamId: string, feedback: FeedbackCreateRequest): Promise<StatusResponse>;
109
+ getAllFeedback(): Promise<FeedbackResponse[]>;
110
+ connectWebSocket(streamId: string): WebSocket;
111
+ /**
112
+ * Health check endpoint (for testing, uses internal port if available)
113
+ * Note: This endpoint may not be available via the main API
114
+ */
115
+ healthCheck(): Promise<string>;
116
+ }
117
+
118
+ interface RealtimeVisionConfig {
119
+ /**
120
+ * Base URL for the API (e.g., "https://api.example.com")
121
+ */
122
+ apiUrl: string;
123
+ /**
124
+ * API key for authentication
125
+ * Required for all API requests
126
+ */
127
+ apiKey: string;
128
+ /**
129
+ * The prompt/task to run on window segments of the stream.
130
+ * This runs continuously (at a defined window interval).
131
+ *
132
+ * Examples:
133
+ * - "Read any visible text"
134
+ * - "Detect objects and return as JSON array"
135
+ * - "Describe facial expression"
136
+ */
137
+ prompt: string;
138
+ /**
139
+ * Video source configuration
140
+ * Defaults to camera with environment facing if not specified
141
+ */
142
+ source?: StreamSource;
143
+ /**
144
+ * Model backend to use
145
+ */
146
+ backend?: "gemini" | "overshoot";
147
+ /**
148
+ * Model name to use for inference
149
+ */
150
+ model?: string;
151
+ /**
152
+ * Optional JSON schema for structured output
153
+ */
154
+ outputSchema?: Record<string, any>;
155
+ /**
156
+ * Called when a new inference result arrives (~1 per second)
157
+ */
158
+ onResult: (result: StreamInferenceResult) => void;
159
+ /**
160
+ * Called when an error occurs
161
+ */
162
+ onError?: (error: Error) => void;
163
+ /**
164
+ * Custom processing configuration
165
+ * All fields are optional and will use defaults if not provided
166
+ */
167
+ processing?: {
168
+ /**
169
+ * Sampling ratio (0-1). Controls what fraction of frames are processed.
170
+ */
171
+ sampling_ratio?: number;
172
+ /**
173
+ * Frames per second (1-120)
174
+ */
175
+ fps?: number;
176
+ /**
177
+ * Clip length in seconds (0.1-60)
178
+ */
179
+ clip_length_seconds?: number;
180
+ /**
181
+ * Delay in seconds (0-60)
182
+ */
183
+ delay_seconds?: number;
184
+ };
185
+ /**
186
+ * ICE servers for WebRTC connection
187
+ * If not provided, uses default TURN servers
188
+ */
189
+ iceServers?: RTCIceServer[];
190
+ /**
191
+ * Enable debug logging
192
+ * @default false
193
+ */
194
+ debug?: boolean;
195
+ }
196
+ declare class RealtimeVision {
197
+ private config;
198
+ private client;
199
+ private logger;
200
+ private mediaStream;
201
+ private peerConnection;
202
+ private webSocket;
203
+ private streamId;
204
+ private keepaliveInterval;
205
+ private videoElement;
206
+ private isRunning;
207
+ constructor(config: RealtimeVisionConfig);
208
+ /**
209
+ * Validate configuration values
210
+ */
211
+ private validateConfig;
212
+ /**
213
+ * Create media stream from the configured source
214
+ */
215
+ private createMediaStream;
216
+ /**
217
+ * Get FPS from media stream
218
+ */
219
+ private getStreamFps;
220
+ /**
221
+ * Get processing configuration with defaults applied
222
+ */
223
+ private getProcessingConfig;
224
+ /**
225
+ * Get the effective source configuration
226
+ */
227
+ private getSource;
228
+ /**
229
+ * Start the vision stream
230
+ */
231
+ start(): Promise<void>;
232
+ /**
233
+ * Set up keepalive interval with error handling
234
+ */
235
+ private setupKeepalive;
236
+ /**
237
+ * Set up WebSocket connection with error handling
238
+ */
239
+ private setupWebSocket;
240
+ /**
241
+ * Handle non-fatal errors (report but don't stop stream)
242
+ */
243
+ private handleNonFatalError;
244
+ /**
245
+ * Handle fatal errors (stop stream and report)
246
+ */
247
+ private handleFatalError;
248
+ /**
249
+ * Update the prompt/task while stream is running
250
+ */
251
+ updatePrompt(prompt: string): Promise<void>;
252
+ /**
253
+ * Stop the vision stream and clean up resources
254
+ */
255
+ stop(): Promise<void>;
256
+ /**
257
+ * Submit feedback for the stream
258
+ */
259
+ submitFeedback(feedback: {
260
+ rating: number;
261
+ category: string;
262
+ feedback?: string;
263
+ }): Promise<void>;
264
+ /**
265
+ * Get the current stream ID
266
+ */
267
+ getStreamId(): string | null;
268
+ /**
269
+ * Get the media stream (for displaying video preview)
270
+ */
271
+ getMediaStream(): MediaStream | null;
272
+ /**
273
+ * Check if the stream is running
274
+ */
275
+ isActive(): boolean;
276
+ private cleanup;
277
+ }
278
+
279
+ declare class ApiError extends Error {
280
+ readonly statusCode?: number;
281
+ readonly requestId?: string;
282
+ readonly details?: any;
283
+ constructor(message: string, statusCode?: number, requestId?: string, details?: any);
284
+ }
285
+ declare class UnauthorizedError extends ApiError {
286
+ constructor(message: string, requestId?: string);
287
+ }
288
+ declare class ValidationError extends ApiError {
289
+ constructor(message: string, requestId?: string, details?: any);
290
+ }
291
+ declare class NotFoundError extends ApiError {
292
+ constructor(message: string, requestId?: string);
293
+ }
294
+ declare class NetworkError extends ApiError {
295
+ readonly cause?: Error;
296
+ constructor(message: string, cause?: Error);
297
+ }
298
+ declare class ServerError extends ApiError {
299
+ constructor(message: string, requestId?: string, details?: any);
300
+ }
301
+
302
+ export { ApiError, type ErrorResponse, type FeedbackCreateRequest, type FeedbackResponse, type KeepaliveResponse, NetworkError, NotFoundError, RealtimeVision, type RealtimeVisionConfig, ServerError, type StatusResponse, StreamClient, type StreamClientMeta, type StreamConfigResponse, type StreamCreateRequest, type StreamCreateResponse, type StreamInferenceConfig, type StreamInferenceResult, type StreamProcessingConfig, type StreamSource, UnauthorizedError, ValidationError, type WebRtcAnswer, type WebRtcOffer };