quantum-ai-sdk 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,130 @@
1
+ import type { QuantumClient } from "./client.js";
2
+ /** Domain context for relevance analysis. */
3
+ export interface VisionContext {
4
+ /** Installation type (e.g. "solar", "heat_pump", "ev_charger"). */
5
+ installationType?: string;
6
+ /** Phase (e.g. "pre_install", "installation", "post_install"). */
7
+ phase?: string;
8
+ /** Expected items for relevance checking. */
9
+ expectedItems?: string[];
10
+ }
11
+ /** Request body for vision analysis endpoints. */
12
+ export interface VisionRequest {
13
+ /** Base64-encoded image (with or without data: prefix). */
14
+ imageBase64?: string;
15
+ /** Image URL (fetched by the model provider). */
16
+ imageUrl?: string;
17
+ /** Model to use. Default: gemini-2.5-flash. */
18
+ model?: string;
19
+ /** Analysis profile: "combined" (default), "scene", "objects", "ocr", "quality". */
20
+ profile?: string;
21
+ /** Domain context for relevance checking. */
22
+ context?: VisionContext;
23
+ }
24
+ /** A detected object with bounding box. */
25
+ export interface DetectedObject {
26
+ /** Object label. */
27
+ label: string;
28
+ /** Detection confidence (0.0 - 1.0). */
29
+ confidence: number;
30
+ /** Bounding box: [y_min, x_min, y_max, x_max] normalised to 0-1000. */
31
+ boundingBox: [number, number, number, number];
32
+ }
33
+ /** Image quality assessment. */
34
+ export interface QualityAssessment {
35
+ /** Overall rating: "good", "acceptable", "poor". */
36
+ overall: string;
37
+ /** Quality score (0.0 - 1.0). */
38
+ score: number;
39
+ /** Blur level: "none", "slight", "significant". */
40
+ blur: string;
41
+ /** Lighting: "well_lit", "dim", "dark". */
42
+ darkness: string;
43
+ /** Resolution: "high", "adequate", "low". */
44
+ resolution: string;
45
+ /** Exposure: "correct", "over", "under". */
46
+ exposure: string;
47
+ /** Specific issues found. */
48
+ issues: string[];
49
+ }
50
+ /** Relevance check against expected content. */
51
+ export interface RelevanceCheck {
52
+ /** Whether the image is relevant to the context. */
53
+ relevant: boolean;
54
+ /** Relevance score (0.0 - 1.0). */
55
+ score: number;
56
+ /** Items expected based on context. */
57
+ expectedItems: string[];
58
+ /** Items actually found in the image. */
59
+ foundItems: string[];
60
+ /** Expected but not found. */
61
+ missingItems: string[];
62
+ /** Found but not expected. */
63
+ unexpectedItems: string[];
64
+ /** Additional notes. */
65
+ notes?: string;
66
+ }
67
+ /** A detected text region in the image. */
68
+ export interface TextOverlay {
69
+ /** Extracted text content. */
70
+ text: string;
71
+ /** Bounding box: [y_min, x_min, y_max, x_max] normalised to 0-1000. */
72
+ boundingBox?: [number, number, number, number];
73
+ /** Overlay type: "gps", "timestamp", "address", "label", "other". */
74
+ type?: string;
75
+ }
76
+ /** OCR / text extraction result. */
77
+ export interface OcrResult {
78
+ /** All extracted text concatenated. */
79
+ text?: string;
80
+ /** Extracted metadata (GPS, timestamp, address, etc.). */
81
+ metadata: Record<string, string>;
82
+ /** Individual text overlays with positions. */
83
+ overlays: TextOverlay[];
84
+ }
85
+ /** Full vision analysis response. */
86
+ export interface VisionResponse {
87
+ /** Scene description. */
88
+ caption?: string;
89
+ /** Suggested tags (lowercase_snake_case). */
90
+ tags: string[];
91
+ /** Detected objects with bounding boxes. */
92
+ objects: DetectedObject[];
93
+ /** Image quality assessment. */
94
+ quality?: QualityAssessment;
95
+ /** Relevance check against context. */
96
+ relevance?: RelevanceCheck;
97
+ /** Extracted text and overlay metadata. */
98
+ ocr?: OcrResult;
99
+ /** Model used. */
100
+ model: string;
101
+ /** Cost in ticks. */
102
+ costTicks: number;
103
+ /** Request identifier. */
104
+ requestId: string;
105
+ }
106
+ /**
107
+ * Full combined vision analysis (scene + objects + quality + OCR + relevance).
108
+ * @internal
109
+ */
110
+ export declare function visionAnalyze(client: QuantumClient, req: VisionRequest): Promise<VisionResponse>;
111
+ /**
112
+ * Object detection with bounding boxes.
113
+ * @internal
114
+ */
115
+ export declare function visionDetect(client: QuantumClient, req: VisionRequest): Promise<VisionResponse>;
116
+ /**
117
+ * Scene description and tags.
118
+ * @internal
119
+ */
120
+ export declare function visionDescribe(client: QuantumClient, req: VisionRequest): Promise<VisionResponse>;
121
+ /**
122
+ * Text extraction and overlay metadata (OCR).
123
+ * @internal
124
+ */
125
+ export declare function visionOcr(client: QuantumClient, req: VisionRequest): Promise<VisionResponse>;
126
+ /**
127
+ * Image quality assessment.
128
+ * @internal
129
+ */
130
+ export declare function visionQuality(client: QuantumClient, req: VisionRequest): Promise<VisionResponse>;
package/dist/vision.js ADDED
@@ -0,0 +1,124 @@
1
+ // ── Wire format (snake_case JSON) ────────────────────────────────
2
+ /** @internal Convert camelCase request to snake_case wire format. */
3
+ function toWire(req) {
4
+ const out = {};
5
+ if (req.imageBase64 !== undefined)
6
+ out.image_base64 = req.imageBase64;
7
+ if (req.imageUrl !== undefined)
8
+ out.image_url = req.imageUrl;
9
+ if (req.model !== undefined)
10
+ out.model = req.model;
11
+ if (req.profile !== undefined)
12
+ out.profile = req.profile;
13
+ if (req.context) {
14
+ const ctx = {};
15
+ if (req.context.installationType !== undefined)
16
+ ctx.installation_type = req.context.installationType;
17
+ if (req.context.phase !== undefined)
18
+ ctx.phase = req.context.phase;
19
+ if (req.context.expectedItems !== undefined)
20
+ ctx.expected_items = req.context.expectedItems;
21
+ out.context = ctx;
22
+ }
23
+ return out;
24
+ }
25
+ /** @internal Convert snake_case wire response to camelCase. */
26
+ function fromWire(raw) {
27
+ const objects = (raw.objects ?? []).map((o) => ({
28
+ label: o.label ?? "",
29
+ confidence: o.confidence ?? 0,
30
+ boundingBox: o.bounding_box ?? [0, 0, 0, 0],
31
+ }));
32
+ let quality;
33
+ if (raw.quality) {
34
+ const q = raw.quality;
35
+ quality = {
36
+ overall: q.overall ?? "",
37
+ score: q.score ?? 0,
38
+ blur: q.blur ?? "",
39
+ darkness: q.darkness ?? "",
40
+ resolution: q.resolution ?? "",
41
+ exposure: q.exposure ?? "",
42
+ issues: q.issues ?? [],
43
+ };
44
+ }
45
+ let relevance;
46
+ if (raw.relevance) {
47
+ const r = raw.relevance;
48
+ relevance = {
49
+ relevant: r.relevant ?? false,
50
+ score: r.score ?? 0,
51
+ expectedItems: r.expected_items ?? [],
52
+ foundItems: r.found_items ?? [],
53
+ missingItems: r.missing_items ?? [],
54
+ unexpectedItems: r.unexpected_items ?? [],
55
+ notes: r.notes,
56
+ };
57
+ }
58
+ let ocr;
59
+ if (raw.ocr) {
60
+ const o = raw.ocr;
61
+ const overlays = (o.overlays ?? []).map((ov) => ({
62
+ text: ov.text ?? "",
63
+ boundingBox: ov.bounding_box,
64
+ type: ov.type,
65
+ }));
66
+ ocr = {
67
+ text: o.text,
68
+ metadata: o.metadata ?? {},
69
+ overlays,
70
+ };
71
+ }
72
+ return {
73
+ caption: raw.caption,
74
+ tags: raw.tags ?? [],
75
+ objects,
76
+ quality,
77
+ relevance,
78
+ ocr,
79
+ model: raw.model ?? "",
80
+ costTicks: raw.cost_ticks ?? 0,
81
+ requestId: raw.request_id ?? "",
82
+ };
83
+ }
84
+ // ── Client methods ───────────────────────────────────────────────
85
+ /**
86
+ * Full combined vision analysis (scene + objects + quality + OCR + relevance).
87
+ * @internal
88
+ */
89
+ export async function visionAnalyze(client, req) {
90
+ const { data } = await client._doJSON("POST", "/qai/v1/vision/analyze", toWire(req));
91
+ return fromWire(data);
92
+ }
93
+ /**
94
+ * Object detection with bounding boxes.
95
+ * @internal
96
+ */
97
+ export async function visionDetect(client, req) {
98
+ const { data } = await client._doJSON("POST", "/qai/v1/vision/detect", toWire(req));
99
+ return fromWire(data);
100
+ }
101
+ /**
102
+ * Scene description and tags.
103
+ * @internal
104
+ */
105
+ export async function visionDescribe(client, req) {
106
+ const { data } = await client._doJSON("POST", "/qai/v1/vision/describe", toWire(req));
107
+ return fromWire(data);
108
+ }
109
+ /**
110
+ * Text extraction and overlay metadata (OCR).
111
+ * @internal
112
+ */
113
+ export async function visionOcr(client, req) {
114
+ const { data } = await client._doJSON("POST", "/qai/v1/vision/ocr", toWire(req));
115
+ return fromWire(data);
116
+ }
117
+ /**
118
+ * Image quality assessment.
119
+ * @internal
120
+ */
121
+ export async function visionQuality(client, req) {
122
+ const { data } = await client._doJSON("POST", "/qai/v1/vision/quality", toWire(req));
123
+ return fromWire(data);
124
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "quantum-ai-sdk",
3
- "version": "0.4.0",
3
+ "version": "0.6.0",
4
4
  "description": "Cosmic Duck SDK — 100+ AI endpoints across 10 providers",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",