@aithos/sdk 0.1.0-alpha.17 → 0.1.0-alpha.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -118,6 +118,75 @@ export interface InvokeImageResult {
118
118
  /** Audit log id for traceability. */
119
119
  readonly auditId: string;
120
120
  }
121
+ export interface InvokeBedrockVisionArgs {
122
+ readonly mandateId?: string;
123
+ /**
124
+ * Model id. Sonnet 4.6 is the default — it's vision-capable and
125
+ * returns reliable structured JSON when prompted.
126
+ */
127
+ readonly model?: string;
128
+ /** Source image — Blob (recommended) or raw base64. */
129
+ readonly image: Blob | {
130
+ readonly base64: string;
131
+ readonly contentType: string;
132
+ };
133
+ /** Text prompt accompanying the image. */
134
+ readonly prompt: string;
135
+ /** Optional system prompt. */
136
+ readonly system?: string;
137
+ readonly maxTokens?: number;
138
+ readonly temperature?: number;
139
+ readonly idempotencyKey?: string;
140
+ readonly signal?: AbortSignal;
141
+ }
142
+ export interface InvokeBedrockVisionResult {
143
+ readonly content: string;
144
+ readonly stopReason: StopReason;
145
+ readonly usage: {
146
+ readonly inputTokens: number;
147
+ readonly outputTokens: number;
148
+ };
149
+ readonly creditsCharged: number;
150
+ readonly walletBalance: number;
151
+ readonly auditId: string;
152
+ }
153
+ export interface InvokeSegmentationArgs {
154
+ /** Mandate id (optional for owner sessions — see InvokeImageArgs). */
155
+ readonly mandateId?: string;
156
+ /** Source image. Blob (recommended) or raw base64. */
157
+ readonly image: Blob | {
158
+ readonly base64: string;
159
+ readonly contentType: string;
160
+ };
161
+ /**
162
+ * Text phrase describing what to segment. Florence-2 is robust with
163
+ * natural-language descriptions: "the torso of the robot", "the
164
+ * dog's head", "the chest of the character".
165
+ */
166
+ readonly textInput: string;
167
+ readonly idempotencyKey?: string;
168
+ readonly signal?: AbortSignal;
169
+ }
170
+ export interface SegmentPolygon {
171
+ readonly points: ReadonlyArray<{
172
+ readonly x: number;
173
+ readonly y: number;
174
+ }>;
175
+ }
176
+ export interface InvokeSegmentationResult {
177
+ /** All polygons Florence-2 returned (typically 1, sometimes a few when the prompt matches multiple regions). */
178
+ readonly polygons: readonly SegmentPolygon[];
179
+ /** Bbox of the first polygon for callers that only need a coarse target. */
180
+ readonly bbox: {
181
+ readonly left: number;
182
+ readonly top: number;
183
+ readonly right: number;
184
+ readonly bottom: number;
185
+ } | null;
186
+ readonly creditsCharged: number;
187
+ readonly walletBalance: number;
188
+ readonly auditId: string;
189
+ }
121
190
  export interface ComputeNamespaceDeps {
122
191
  readonly auth: AithosAuth;
123
192
  readonly appDid: string;
@@ -162,6 +231,16 @@ export declare class ComputeNamespace {
162
231
  * `mandate_revoked`, `insufficient_credits`, …).
163
232
  */
164
233
  invokeBedrock(args: InvokeBedrockArgs): Promise<InvokeBedrockResult>;
234
+ /**
235
+ * Multimodal Bedrock invoke — image + text → text response.
236
+ * Default model: `claude-sonnet-4-6` (vision-capable, reliable JSON).
237
+ *
238
+ * Use when you need a VLM to reason about an image: locating
239
+ * features, structured extraction, semantic Q&A. Prompt the model
240
+ * to return JSON if you need structured output (the API itself is
241
+ * unstructured).
242
+ */
243
+ invokeBedrockVision(args: InvokeBedrockVisionArgs): Promise<InvokeBedrockVisionResult>;
165
244
  /**
166
245
  * Generate one or more images through the Aithos compute proxy
167
246
  * (currently powered by fal.ai FLUX models). Spec mirror of
@@ -182,5 +261,18 @@ export declare class ComputeNamespace {
182
261
  * - flux-pro-1.1-ultra: 60 000 mc + fee per image
183
262
  */
184
263
  invokeImage(args: InvokeImageArgs): Promise<InvokeImageResult>;
264
+ /**
265
+ * Run text-prompted segmentation (Florence-2 referring-expression)
266
+ * on a source image. Returns one or more polygons hugging the
267
+ * region matching the text prompt.
268
+ *
269
+ * Use cases: locate the chest/torso area of a generated mascot
270
+ * for logo compositing, find the face zone for a thumbnail crop,
271
+ * extract a product from a marketing shot — anything that needs
272
+ * a precise mask + bbox from natural-language description.
273
+ *
274
+ * Pricing: flat 5 000 mc per call (~$0.005 — Florence-2 is cheap).
275
+ */
276
+ invokeSegmentation(args: InvokeSegmentationArgs): Promise<InvokeSegmentationResult>;
185
277
  }
186
278
  //# sourceMappingURL=compute.d.ts.map
@@ -86,6 +86,56 @@ export class ComputeNamespace {
86
86
  signal: args.signal,
87
87
  });
88
88
  }
89
+ /**
90
+ * Multimodal Bedrock invoke — image + text → text response.
91
+ * Default model: `claude-sonnet-4-6` (vision-capable, reliable JSON).
92
+ *
93
+ * Use when you need a VLM to reason about an image: locating
94
+ * features, structured extraction, semantic Q&A. Prompt the model
95
+ * to return JSON if you need structured output (the API itself is
96
+ * unstructured).
97
+ */
98
+ async invokeBedrockVision(args) {
99
+ const { endpoints, fetch: fetchImpl } = this.#deps;
100
+ const choice = this.#resolveSigner(args.mandateId);
101
+ let imageBase64;
102
+ let imageContentType;
103
+ if ("base64" in args.image) {
104
+ imageBase64 = args.image.base64;
105
+ imageContentType = args.image.contentType;
106
+ }
107
+ else {
108
+ const buf = await args.image.arrayBuffer();
109
+ imageBase64 = arrayBufferToBase64(buf);
110
+ imageContentType = args.image.type || "image/png";
111
+ }
112
+ const url = computeInvokeUrl(endpoints);
113
+ const idempotencyKey = args.idempotencyKey ?? generateIdempotencyKey();
114
+ const model = args.model ?? "claude-sonnet-4-6";
115
+ const params = {
116
+ app_did: this.#deps.appDid,
117
+ mandate_id: this.#resolveMandateIdForWire(args.mandateId, choice),
118
+ model,
119
+ image_base64: imageBase64,
120
+ image_content_type: imageContentType,
121
+ prompt: args.prompt,
122
+ idempotency_key: idempotencyKey,
123
+ };
124
+ if (args.system !== undefined)
125
+ params.system = args.system;
126
+ if (args.maxTokens !== undefined)
127
+ params.max_tokens = args.maxTokens;
128
+ if (args.temperature !== undefined)
129
+ params.temperature = args.temperature;
130
+ return await this.#signAndPost({
131
+ url,
132
+ method: "aithos.compute_invoke_bedrock_vision",
133
+ params,
134
+ choice,
135
+ fetchImpl,
136
+ signal: args.signal,
137
+ });
138
+ }
89
139
  /**
90
140
  * Generate one or more images through the Aithos compute proxy
91
141
  * (currently powered by fal.ai FLUX models). Spec mirror of
@@ -138,6 +188,52 @@ export class ComputeNamespace {
138
188
  signal: args.signal,
139
189
  });
140
190
  }
191
+ /**
192
+ * Run text-prompted segmentation (Florence-2 referring-expression)
193
+ * on a source image. Returns one or more polygons hugging the
194
+ * region matching the text prompt.
195
+ *
196
+ * Use cases: locate the chest/torso area of a generated mascot
197
+ * for logo compositing, find the face zone for a thumbnail crop,
198
+ * extract a product from a marketing shot — anything that needs
199
+ * a precise mask + bbox from natural-language description.
200
+ *
201
+ * Pricing: flat 5 000 mc per call (~$0.005 — Florence-2 is cheap).
202
+ */
203
+ async invokeSegmentation(args) {
204
+ const { endpoints, fetch: fetchImpl } = this.#deps;
205
+ const choice = this.#resolveSigner(args.mandateId);
206
+ // Normalize image input to base64 + content type.
207
+ let imageBase64;
208
+ let imageContentType;
209
+ if ("base64" in args.image) {
210
+ imageBase64 = args.image.base64;
211
+ imageContentType = args.image.contentType;
212
+ }
213
+ else {
214
+ const buf = await args.image.arrayBuffer();
215
+ imageBase64 = arrayBufferToBase64(buf);
216
+ imageContentType = args.image.type || "image/png";
217
+ }
218
+ const url = computeInvokeUrl(endpoints);
219
+ const idempotencyKey = args.idempotencyKey ?? generateIdempotencyKey();
220
+ const params = {
221
+ app_did: this.#deps.appDid,
222
+ mandate_id: this.#resolveMandateIdForWire(args.mandateId, choice),
223
+ image_base64: imageBase64,
224
+ image_content_type: imageContentType,
225
+ text_input: args.textInput,
226
+ idempotency_key: idempotencyKey,
227
+ };
228
+ return await this.#signAndPost({
229
+ url,
230
+ method: "aithos.compute_invoke_segmentation",
231
+ params,
232
+ choice,
233
+ fetchImpl,
234
+ signal: args.signal,
235
+ });
236
+ }
141
237
  /**
142
238
  * Resolve the active signer (owner takes precedence over delegate).
143
239
  *
@@ -267,4 +363,19 @@ function generateIdempotencyKey() {
267
363
  }
268
364
  return hex;
269
365
  }
366
+ /**
367
+ * Encode an ArrayBuffer as base64 in environments where `Buffer` is
368
+ * not available (browser). Uses btoa over a binary string — safe for
369
+ * the small payload sizes the SDK deals with (≤ a few MB).
370
+ */
371
+ function arrayBufferToBase64(buf) {
372
+ const bytes = new Uint8Array(buf);
373
+ let bin = "";
374
+ // Process in chunks to avoid stack overflow on String.fromCharCode.apply
375
+ const CHUNK = 0x8000;
376
+ for (let i = 0; i < bytes.length; i += CHUNK) {
377
+ bin += String.fromCharCode.apply(null, Array.from(bytes.subarray(i, i + CHUNK)));
378
+ }
379
+ return btoa(bin);
380
+ }
270
381
  //# sourceMappingURL=compute.js.map
@@ -1,11 +1,11 @@
1
- export declare const VERSION = "0.1.0-alpha.17";
1
+ export declare const VERSION = "0.1.0-alpha.19";
2
2
  export { AithosSDK } from "./sdk.js";
3
3
  export type { AithosSDKConfig } from "./types.js";
4
4
  export { AithosSDKError } from "./types.js";
5
5
  export { AithosRpcError } from "@aithos/protocol-client";
6
6
  export type { AithosSdkEndpoints } from "./endpoints.js";
7
7
  export { DEFAULT_SDK_ENDPOINTS } from "./endpoints.js";
8
- export type { ComputeMessage, ImageAspectRatio, ImageModelId, InvokeBedrockArgs, InvokeBedrockResult, InvokeImageArgs, InvokeImageImage, InvokeImageResult, StopReason, } from "./compute.js";
8
+ export type { ComputeMessage, ImageAspectRatio, ImageModelId, InvokeBedrockArgs, InvokeBedrockResult, InvokeBedrockVisionArgs, InvokeBedrockVisionResult, InvokeImageArgs, InvokeImageImage, InvokeImageResult, InvokeSegmentationArgs, InvokeSegmentationResult, SegmentPolygon, StopReason, } from "./compute.js";
9
9
  export { ComputeNamespace } from "./compute.js";
10
10
  export type { CreditPackId, CreateTopupSessionArgs, CreateTopupSessionResult, GetBalanceArgs, GetBalanceResult, } from "./wallet.js";
11
11
  export { WalletNamespace } from "./wallet.js";
package/dist/src/index.js CHANGED
@@ -17,7 +17,7 @@
17
17
  // Public types specific to the SDK (`AithosSDKConfig`, `AithosSDKError`)
18
18
  // are exported from here. Endpoint config (`AithosSdkEndpoints`,
19
19
  // `DEFAULT_SDK_ENDPOINTS`) likewise.
20
- export const VERSION = "0.1.0-alpha.17";
20
+ export const VERSION = "0.1.0-alpha.19";
21
21
  export { AithosSDK } from "./sdk.js";
22
22
  export { AithosSDKError } from "./types.js";
23
23
  // Re-export protocol-client's JSON-RPC error type so consumers can
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aithos/sdk",
3
- "version": "0.1.0-alpha.17",
3
+ "version": "0.1.0-alpha.19",
4
4
  "description": "Aithos SDK — high-level TypeScript developer kit for building agentic apps on the Aithos protocol. Wraps @aithos/protocol-client and exposes the Aithos compute proxy and wallet (Stripe top-up) endpoints.",
5
5
  "keywords": [
6
6
  "aithos",