@providerprotocol/ai 0.0.25 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { g as Provider } from '../provider-x4RocsnK.js';
1
+ import { e as Provider } from '../provider-6-mJYOOl.js';
2
2
 
3
3
  /**
4
4
  * @fileoverview Anthropic API type definitions.
@@ -1,4 +1,4 @@
1
- import { C as ContentBlock, m as ImageBlock, n as AudioBlock, V as VideoBlock, R as ReasoningBlock, A as AssistantContent, U as UserContent } from './provider-x4RocsnK.js';
1
+ import { C as ContentBlock, k as ImageBlock, l as AudioBlock, V as VideoBlock, R as ReasoningBlock, A as AssistantContent, U as UserContent, P as ProviderIdentity, a as ProviderConfig, N as EmbeddingInput, J as EmbeddingUsage, D as BoundEmbeddingModel } from './provider-6-mJYOOl.js';
2
2
 
3
3
  /**
4
4
  * @fileoverview JSON Schema types for tool parameters and structured outputs.
@@ -1077,4 +1077,159 @@ declare function contentBlockStart(index: number): StreamEvent;
1077
1077
  */
1078
1078
  declare function contentBlockStop(index: number): StreamEvent;
1079
1079
 
1080
- export { AssistantMessage as A, type BeforeCallResult as B, toolCallDelta as C, messageStart as D, type EventDelta as E, messageStop as F, contentBlockStart as G, contentBlockStop as H, type TurnJSON as I, type JSONSchema as J, Message as M, type StreamResult as S, type Turn as T, UserMessage as U, type MessageType as a, type MessageJSON as b, type Tool as c, type ToolUseStrategy as d, type TokenUsage as e, type StreamEvent as f, type JSONSchemaProperty as g, type JSONSchemaPropertyType as h, type ToolCall as i, type ToolResult as j, type ToolMetadata as k, type AfterCallResult as l, type ToolExecution as m, ToolResultMessage as n, MessageRole as o, isUserMessage as p, isAssistantMessage as q, isToolResultMessage as r, type MessageMetadata as s, type MessageOptions as t, createTurn as u, emptyUsage as v, aggregateUsage as w, StreamEventType as x, createStreamResult as y, textDelta as z };
1080
+ /**
1081
+ * @fileoverview Embedding types for vector embedding generation.
1082
+ *
1083
+ * Defines the interfaces for configuring and executing embedding operations,
1084
+ * including options, instances, requests, responses, and streaming progress.
1085
+ *
1086
+ * @module types/embedding
1087
+ */
1088
+
1089
+ /**
1090
+ * Structural type for embedding model input.
1091
+ * Uses structural typing to avoid generic variance issues with Provider generics.
1092
+ *
1093
+ * @remarks
1094
+ * This type mirrors {@link ModelReference} while keeping provider options
1095
+ * structurally compatible across providers.
1096
+ *
1097
+ * @see ModelReference
1098
+ */
1099
+ interface EmbeddingModelInput {
1100
+ readonly modelId: string;
1101
+ readonly provider: ProviderIdentity;
1102
+ /** Optional provider configuration merged into requests */
1103
+ readonly providerConfig?: Partial<ProviderConfig>;
1104
+ }
1105
+ /**
1106
+ * Options for creating an embedding instance with the embedding() function.
1107
+ *
1108
+ * @typeParam TParams - Provider-specific parameter type
1109
+ *
1110
+ * @example
1111
+ * ```typescript
1112
+ * const options: EmbeddingOptions<OpenAIEmbedParams> = {
1113
+ * model: openai('text-embedding-3-large'),
1114
+ * config: { apiKey: process.env.OPENAI_API_KEY },
1115
+ * params: { dimensions: 1536 }
1116
+ * };
1117
+ * ```
1118
+ */
1119
+ interface EmbeddingOptions<TParams = unknown> {
1120
+ /** A model reference from a provider factory */
1121
+ model: EmbeddingModelInput;
1122
+ /** Provider infrastructure configuration */
1123
+ config?: ProviderConfig;
1124
+ /** Provider-specific parameters (passed through unchanged) */
1125
+ params?: TParams;
1126
+ }
1127
+ /**
1128
+ * Options for embed() calls.
1129
+ */
1130
+ interface EmbedOptions {
1131
+ /**
1132
+ * Enable chunked processing with progress for large input sets.
1133
+ * When true, returns EmbeddingStream instead of Promise.
1134
+ */
1135
+ chunked?: boolean;
1136
+ /** Inputs per batch when chunked (default: provider max) */
1137
+ batchSize?: number;
1138
+ /** Concurrent batch limit when chunked (default: 1) */
1139
+ concurrency?: number;
1140
+ /** Abort signal for cancellation */
1141
+ signal?: AbortSignal;
1142
+ }
1143
+ /**
1144
+ * Single embedding vector result.
1145
+ */
1146
+ interface Embedding {
1147
+ /** The embedding vector */
1148
+ vector: number[];
1149
+ /** Vector dimensionality */
1150
+ dimensions: number;
1151
+ /** Index corresponding to input array position */
1152
+ index: number;
1153
+ /** Token count for this input (if provider reports) */
1154
+ tokens?: number;
1155
+ /** Provider-specific per-embedding metadata */
1156
+ metadata?: Record<string, unknown>;
1157
+ }
1158
+ /**
1159
+ * Result from embed() call.
1160
+ */
1161
+ interface EmbeddingResult {
1162
+ /** Embeddings in same order as inputs */
1163
+ embeddings: Embedding[];
1164
+ /** Usage statistics */
1165
+ usage: EmbeddingUsage;
1166
+ /** Provider-specific response metadata */
1167
+ metadata?: Record<string, unknown>;
1168
+ }
1169
+ /**
1170
+ * Progress update when using chunked mode.
1171
+ */
1172
+ interface EmbeddingProgress {
1173
+ /** Embeddings from the latest batch */
1174
+ embeddings: Embedding[];
1175
+ /** Total embeddings completed so far */
1176
+ completed: number;
1177
+ /** Total number of inputs */
1178
+ total: number;
1179
+ /** Percentage complete (0-100) */
1180
+ percent: number;
1181
+ }
1182
+ /**
1183
+ * Async iterable stream with final result accessor.
1184
+ * Returned when embed() is called with { chunked: true }.
1185
+ */
1186
+ interface EmbeddingStream extends AsyncIterable<EmbeddingProgress> {
1187
+ /** Promise resolving to complete result after iteration */
1188
+ readonly result: Promise<EmbeddingResult>;
1189
+ /** Abort the operation */
1190
+ abort(): void;
1191
+ }
1192
+ /**
1193
+ * Embedding instance returned by the embedding() function.
1194
+ *
1195
+ * @typeParam TParams - Provider-specific parameter type
1196
+ *
1197
+ * @example
1198
+ * ```typescript
1199
+ * const embedder = embedding({ model: openai('text-embedding-3-large') });
1200
+ *
1201
+ * // Single input
1202
+ * const result = await embedder.embed('Hello world');
1203
+ *
1204
+ * // Batch input
1205
+ * const batch = await embedder.embed(['doc1', 'doc2', 'doc3']);
1206
+ *
1207
+ * // Large-scale with progress
1208
+ * const stream = embedder.embed(documents, { chunked: true });
1209
+ * for await (const progress of stream) {
1210
+ * console.log(`${progress.percent}% complete`);
1211
+ * }
1212
+ * ```
1213
+ */
1214
+ interface EmbeddingInstance<TParams = unknown> {
1215
+ /**
1216
+ * Generate embeddings for one or more inputs.
1217
+ *
1218
+ * @param input - Single input or array of inputs
1219
+ * @param options - Optional embed options
1220
+ * @returns Promise<EmbeddingResult> or EmbeddingStream if chunked
1221
+ */
1222
+ embed(input: EmbeddingInput | EmbeddingInput[], options?: EmbedOptions & {
1223
+ chunked?: false;
1224
+ }): Promise<EmbeddingResult>;
1225
+ embed(input: EmbeddingInput[], options: EmbedOptions & {
1226
+ chunked: true;
1227
+ }): EmbeddingStream;
1228
+ embed(input: EmbeddingInput | EmbeddingInput[], options?: EmbedOptions): Promise<EmbeddingResult> | EmbeddingStream;
1229
+ /** The bound embedding model */
1230
+ readonly model: BoundEmbeddingModel<TParams>;
1231
+ /** Current parameters */
1232
+ readonly params: TParams | undefined;
1233
+ }
1234
+
1235
+ export { AssistantMessage as A, type BeforeCallResult as B, createStreamResult as C, textDelta as D, type EmbeddingOptions as E, toolCallDelta as F, messageStart as G, messageStop as H, contentBlockStart as I, type JSONSchema as J, contentBlockStop as K, type EmbedOptions as L, Message as M, type Embedding as N, type EmbeddingResult as O, type EmbeddingProgress as P, type EmbeddingStream as Q, type EmbeddingModelInput as R, type StreamResult as S, type Turn as T, UserMessage as U, type TurnJSON as V, type MessageType as a, type MessageJSON as b, type Tool as c, type ToolUseStrategy as d, type TokenUsage as e, type StreamEvent as f, type EmbeddingInstance as g, type JSONSchemaProperty as h, type JSONSchemaPropertyType as i, type ToolCall as j, type ToolResult as k, type ToolMetadata as l, type AfterCallResult as m, type ToolExecution as n, ToolResultMessage as o, MessageRole as p, isUserMessage as q, isAssistantMessage as r, isToolResultMessage as s, type MessageMetadata as t, type MessageOptions as u, createTurn as v, emptyUsage as w, aggregateUsage as x, type EventDelta as y, StreamEventType as z };
@@ -1,4 +1,4 @@
1
- import { a as ProviderConfig, g as Provider } from '../provider-x4RocsnK.js';
1
+ import { a as ProviderConfig, e as Provider } from '../provider-6-mJYOOl.js';
2
2
 
3
3
  /**
4
4
  * Provider-specific parameters for Google Gemini API requests.
@@ -42,6 +42,17 @@ interface GoogleLLMParams {
42
42
  responseMimeType?: 'text/plain' | 'application/json';
43
43
  /** Response schema for structured output */
44
44
  responseSchema?: Record<string, unknown>;
45
+ /**
46
+ * Modalities to generate in the response.
47
+ *
48
+ * Use `['IMAGE']` or `['TEXT', 'IMAGE']` with Gemini image generation models
49
+ * (e.g., gemini-2.5-flash-image aka Nano Banana).
50
+ */
51
+ responseModalities?: GoogleResponseModality[];
52
+ /**
53
+ * Image generation configuration for Gemini image response modalities.
54
+ */
55
+ imageConfig?: GoogleImageConfig;
45
56
  /**
46
57
  * Presence penalty for new topics
47
58
  * Positive values encourage discussing new topics
@@ -124,6 +135,28 @@ interface GoogleLLMParams {
124
135
  */
125
136
  toolConfig?: GoogleToolConfig;
126
137
  }
138
+ /**
139
+ * Output modality enum values for Gemini responseModalities.
140
+ *
141
+ * The API supports TEXT, IMAGE, and AUDIO response types. Some SDK examples
142
+ * use Title Case values, so both are accepted here.
143
+ */
144
+ type GoogleResponseModality = 'TEXT' | 'IMAGE' | 'AUDIO' | 'Text' | 'Image' | 'Audio';
145
+ /**
146
+ * Image generation configuration for Gemini response modalities.
147
+ */
148
+ interface GoogleImageConfig {
149
+ /**
150
+ * Preferred aspect ratio for generated images.
151
+ * Example: "1:1", "9:16", "16:9".
152
+ */
153
+ aspectRatio?: string;
154
+ /**
155
+ * Preferred output size for generated images.
156
+ * Example: "1024x1024".
157
+ */
158
+ imageSize?: string;
159
+ }
127
160
  /**
128
161
  * Configuration for extended thinking/reasoning in Gemini 2.5+ and 3+ models.
129
162
  *
@@ -980,4 +1013,4 @@ declare const google: Provider<unknown> & {
980
1013
  };
981
1014
  };
982
1015
 
983
- export { type CacheCreateOptions, type CacheListOptions, type GoogleBuiltInTool, type GoogleCacheCreateRequest, type GoogleCacheListResponse, type GoogleCacheResponse, type GoogleCacheUpdateRequest, type GoogleCodeExecutionResult, type GoogleCodeExecutionTool, type GoogleEmbedParams, type GoogleFileSearchTool, type GoogleGroundingMetadata, type GoogleHeaders, type GoogleLLMParams, type GoogleMapsTool, type GoogleSearchTool, type GoogleTaskType, type GoogleToolConfig, type GoogleUrlContextTool, cache, google, tools };
1016
+ export { type CacheCreateOptions, type CacheListOptions, type GoogleBuiltInTool, type GoogleCacheCreateRequest, type GoogleCacheListResponse, type GoogleCacheResponse, type GoogleCacheUpdateRequest, type GoogleCodeExecutionResult, type GoogleCodeExecutionTool, type GoogleEmbedParams, type GoogleFileSearchTool, type GoogleGroundingMetadata, type GoogleHeaders, type GoogleImageConfig, type GoogleLLMParams, type GoogleMapsTool, type GoogleResponseModality, type GoogleSearchTool, type GoogleTaskType, type GoogleToolConfig, type GoogleUrlContextTool, cache, google, tools };
@@ -281,6 +281,16 @@ function transformResponse(data) {
281
281
  args: fc.functionCall.args,
282
282
  thoughtSignature: fc.thoughtSignature
283
283
  });
284
+ } else if ("inlineData" in part) {
285
+ const imagePart = part;
286
+ const dataString = imagePart.inlineData.data;
287
+ if (dataString) {
288
+ content.push({
289
+ type: "image",
290
+ mimeType: imagePart.inlineData.mimeType ?? "image/png",
291
+ source: { type: "base64", data: dataString }
292
+ });
293
+ }
284
294
  } else if ("codeExecutionResult" in part) {
285
295
  const codeResult = part;
286
296
  if (codeResult.codeExecutionResult.output) {
@@ -326,6 +336,7 @@ function createStreamState() {
326
336
  reasoning: "",
327
337
  thoughtSignature: void 0,
328
338
  toolCalls: [],
339
+ images: [],
329
340
  finishReason: null,
330
341
  inputTokens: 0,
331
342
  outputTokens: 0,
@@ -387,6 +398,20 @@ function transformStreamChunk(chunk, state) {
387
398
  argumentsJson: JSON.stringify(fc.functionCall.args)
388
399
  }
389
400
  });
401
+ } else if ("inlineData" in part) {
402
+ const imagePart = part;
403
+ const dataString = imagePart.inlineData.data;
404
+ if (dataString) {
405
+ state.images.push({
406
+ data: dataString,
407
+ mimeType: imagePart.inlineData.mimeType ?? "image/png"
408
+ });
409
+ events.push({
410
+ type: StreamEventType.ImageDelta,
411
+ index: state.images.length - 1,
412
+ delta: { data: decodeBase64(dataString) }
413
+ });
414
+ }
390
415
  } else if ("codeExecutionResult" in part) {
391
416
  const codeResult = part;
392
417
  if (codeResult.codeExecutionResult.output) {
@@ -424,6 +449,13 @@ function buildResponseFromState(state) {
424
449
  } catch {
425
450
  }
426
451
  }
452
+ for (const imageData of state.images) {
453
+ content.push({
454
+ type: "image",
455
+ mimeType: imageData.mimeType,
456
+ source: { type: "base64", data: imageData.data }
457
+ });
458
+ }
427
459
  for (const tc of state.toolCalls) {
428
460
  const toolCallId = tc.id || createGoogleToolCallId(tc.name, toolCalls.length);
429
461
  toolCalls.push({
@@ -479,6 +511,14 @@ function normalizeStopReason(reason) {
479
511
  return "end_turn";
480
512
  }
481
513
  }
514
+ function decodeBase64(base64) {
515
+ const binaryString = atob(base64);
516
+ const bytes = new Uint8Array(binaryString.length);
517
+ for (let i = 0; i < binaryString.length; i += 1) {
518
+ bytes[i] = binaryString.charCodeAt(i);
519
+ }
520
+ return bytes;
521
+ }
482
522
 
483
523
  // src/providers/google/llm.ts
484
524
  var GOOGLE_API_BASE = "https://generativelanguage.googleapis.com/v1beta";
@@ -488,7 +528,8 @@ var GOOGLE_CAPABILITIES = {
488
528
  structuredOutput: true,
489
529
  imageInput: true,
490
530
  videoInput: true,
491
- audioInput: true
531
+ audioInput: true,
532
+ imageOutput: true
492
533
  };
493
534
  function buildUrl(modelId, action) {
494
535
  return `${GOOGLE_API_BASE}/models/${modelId}:${action}`;