@agentfield/sdk 0.1.89 → 0.1.90-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1344,6 +1344,32 @@ declare class Audio {
1344
1344
  */
1345
1345
  static fromBase64(base64Data: string, format?: 'wav' | 'mp3' | 'flac' | 'ogg'): Promise<Audio>;
1346
1346
  }
1347
+ /**
1348
+ * Represents video content in a multimodal prompt.
1349
+ */
1350
+ declare class Video {
1351
+ readonly type: 'video_url';
1352
+ readonly videoUrl: {
1353
+ url: string;
1354
+ };
1355
+ private constructor();
1356
+ /**
1357
+ * Create Video from a local file by converting to a base64 data URL.
1358
+ */
1359
+ static fromFile(filePath: string): Promise<Video>;
1360
+ /**
1361
+ * Create Video from a URL.
1362
+ */
1363
+ static fromUrl(url: string): Video;
1364
+ /**
1365
+ * Create Video from a buffer.
1366
+ */
1367
+ static fromBuffer(buffer: Buffer | Uint8Array, mimeType?: string): Promise<Video>;
1368
+ /**
1369
+ * Create Video from a base64 string.
1370
+ */
1371
+ static fromBase64(base64Data: string, mimeType?: string): Promise<Video>;
1372
+ }
1347
1373
  /**
1348
1374
  * Represents a generic file content in a multimodal prompt.
1349
1375
  */
@@ -1407,6 +1433,22 @@ declare function audioFromBuffer(buffer: Buffer | Uint8Array, format?: 'wav' | '
1407
1433
  * Create audio content from a base64 string.
1408
1434
  */
1409
1435
  declare function audioFromBase64(base64Data: string, format?: 'wav' | 'mp3' | 'flac' | 'ogg'): Promise<Audio>;
1436
+ /**
1437
+ * Create video content from a local file.
1438
+ */
1439
+ declare function videoFromFile(filePath: string): Promise<Video>;
1440
+ /**
1441
+ * Create video content from a URL.
1442
+ */
1443
+ declare function videoFromUrl(url: string): Video;
1444
+ /**
1445
+ * Create video content from a buffer.
1446
+ */
1447
+ declare function videoFromBuffer(buffer: Buffer | Uint8Array, mimeType?: string): Promise<Video>;
1448
+ /**
1449
+ * Create video content from a base64 string.
1450
+ */
1451
+ declare function videoFromBase64(base64Data: string, mimeType?: string): Promise<Video>;
1410
1452
  /**
1411
1453
  * Create file content from a local file.
1412
1454
  */
@@ -1423,7 +1465,7 @@ declare function fileFromBuffer(buffer: Buffer | Uint8Array, mimeType: string):
1423
1465
  * Create file content from a base64 string.
1424
1466
  */
1425
1467
  declare function fileFromBase64(base64Data: string, mimeType: string): Promise<File>;
1426
- type MultimodalContent = Text | Image | Audio | File;
1468
+ type MultimodalContent = Text | Image | Audio | Video | File;
1427
1469
 
1428
1470
  /**
1429
1471
  * Multimodal response classes for handling LLM multimodal outputs.
@@ -1879,4 +1921,4 @@ declare class ApprovalClient {
1879
1921
  waitForApproval(executionId: string, opts?: WaitForApprovalOptions): Promise<ApprovalStatusResponse>;
1880
1922
  }
1881
1923
 
1882
- export { ACTIVE_STATUSES, AIClient, type AIConfig, type AIEmbeddingOptions, type AIRequestOptions, type AIStream, type AIToolRequestOptions, Agent, type AgentCapability, type AgentConfig, type AgentHandler, AgentRouter, type AgentRouterOptions, type AgentState, ApprovalClient, type ApprovalRequestResponse, type ApprovalStatusResponse, Audio, type AudioOutput, type AudioRequest, type AuditTrailExport, type AuditTrailFilters, type Awaitable, CANONICAL_STATUSES, type CompactCapability, type CompactDiscoveryResponse, DIDAuthenticator, type DIDIdentity, type DIDIdentityPackage, type DIDRegistrationRequest, type DIDRegistrationResponse, type DeploymentType, DidClient, DidInterface, DidManager, type DiscoveryFormat, type DiscoveryOptions, type DiscoveryPagination, type DiscoveryResponse, type DiscoveryResult, ExecutionContext, type ExecutionCredential, type ExecutionLogAttributes, type ExecutionLogBatchPayload, type ExecutionLogContext, type ExecutionLogEmitOptions, type ExecutionLogEntry, type ExecutionLogLevel, type ExecutionLogTransport, type ExecutionLogTransportPayload, type ExecutionLogWireEntry, ExecutionLogger, type ExecutionLoggerOptions, type ExecutionMetadata, ExecutionStatus, type ExecutionStatusValue, File, type FileOutput, type GenerateCredentialOptions, type GenerateCredentialParams, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, type HarnessConfig, type HarnessOptions, type HarnessProvider, type HarnessResult, HarnessRunner, type HealthStatus, Image, type ImageOutput, type ImageRequest, type MediaProvider, MediaProviderError, type MediaResponse, MediaRouter, type MemoryChangeEvent, MemoryClient, MemoryClientBase, type MemoryConfig, MemoryEventClient, type MemoryEventHandler, type MemoryEventHistoryOptions, MemoryInterface, type MemoryRequestMetadata, type MemoryRequestOptions, type MemoryScope, type MemoryWatchHandler, type Metrics, type MultimodalContent, MultimodalResponse, OpenRouterMediaProvider, type OpenRouterMediaProviderOptions, RateLimitError, type RateLimiterOptions, type RawExecutionContext, type RawResult, type ReasonerCapability, ReasonerContext, type ReasonerDefinition, type ReasonerHandler, type ReasonerOptions, type RequestApprovalPayload, SUPPORTED_PROVIDERS, type ServerlessAdapter, type ServerlessEvent, type ServerlessResponse, type SkillCapability, SkillContext, type SkillDefinition, type SkillHandler, type SkillOptions, StatelessRateLimiter, TERMINAL_STATUSES, Text, type ToolCallConfig, type ToolCallRecord, type ToolCallTrace, type ToolsOption, type VectorSearchOptions, type VectorSearchResult, type VideoFrameImage, type VideoInputReference, type VideoRequest, type WaitForApprovalOptions, type WorkflowCredential, type WorkflowMetadata, type WorkflowProgressOptions, WorkflowReporter, type ZodSchema, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text };
1924
+ export { ACTIVE_STATUSES, AIClient, type AIConfig, type AIEmbeddingOptions, type AIRequestOptions, type AIStream, type AIToolRequestOptions, Agent, type AgentCapability, type AgentConfig, type AgentHandler, AgentRouter, type AgentRouterOptions, type AgentState, ApprovalClient, type ApprovalRequestResponse, type ApprovalStatusResponse, Audio, type AudioOutput, type AudioRequest, type AuditTrailExport, type AuditTrailFilters, type Awaitable, CANONICAL_STATUSES, type CompactCapability, type CompactDiscoveryResponse, DIDAuthenticator, type DIDIdentity, type DIDIdentityPackage, type DIDRegistrationRequest, type DIDRegistrationResponse, type DeploymentType, DidClient, DidInterface, DidManager, type DiscoveryFormat, type DiscoveryOptions, type DiscoveryPagination, type DiscoveryResponse, type DiscoveryResult, ExecutionContext, type ExecutionCredential, type ExecutionLogAttributes, type ExecutionLogBatchPayload, type ExecutionLogContext, type ExecutionLogEmitOptions, type ExecutionLogEntry, type ExecutionLogLevel, type ExecutionLogTransport, type ExecutionLogTransportPayload, type ExecutionLogWireEntry, ExecutionLogger, type ExecutionLoggerOptions, type ExecutionMetadata, ExecutionStatus, type ExecutionStatusValue, File, type FileOutput, type GenerateCredentialOptions, type GenerateCredentialParams, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, type HarnessConfig, type HarnessOptions, type HarnessProvider, type HarnessResult, HarnessRunner, type HealthStatus, Image, type ImageOutput, type ImageRequest, type MediaProvider, MediaProviderError, type MediaResponse, MediaRouter, type MemoryChangeEvent, MemoryClient, MemoryClientBase, type MemoryConfig, MemoryEventClient, type MemoryEventHandler, type MemoryEventHistoryOptions, MemoryInterface, type MemoryRequestMetadata, type MemoryRequestOptions, type MemoryScope, type MemoryWatchHandler, type Metrics, type MultimodalContent, MultimodalResponse, OpenRouterMediaProvider, type OpenRouterMediaProviderOptions, RateLimitError, type RateLimiterOptions, type RawExecutionContext, type RawResult, type ReasonerCapability, ReasonerContext, type ReasonerDefinition, type ReasonerHandler, type ReasonerOptions, type RequestApprovalPayload, SUPPORTED_PROVIDERS, type ServerlessAdapter, type ServerlessEvent, type ServerlessResponse, type SkillCapability, SkillContext, type SkillDefinition, type SkillHandler, type SkillOptions, StatelessRateLimiter, TERMINAL_STATUSES, Text, type ToolCallConfig, type ToolCallRecord, type ToolCallTrace, type ToolsOption, type VectorSearchOptions, type VectorSearchResult, Video, type VideoFrameImage, type VideoInputReference, type VideoRequest, type WaitForApprovalOptions, type WorkflowCredential, type WorkflowMetadata, type WorkflowProgressOptions, WorkflowReporter, type ZodSchema, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text, videoFromBase64, videoFromBuffer, videoFromFile, videoFromUrl };
package/dist/index.js CHANGED
@@ -4982,6 +4982,13 @@ var AUDIO_MIME_TYPES = {
4982
4982
  ".flac": "audio/flac",
4983
4983
  ".ogg": "audio/ogg"
4984
4984
  };
4985
+ var VIDEO_MIME_TYPES = {
4986
+ ".mp4": "video/mp4",
4987
+ ".mpeg": "video/mpeg",
4988
+ ".mpg": "video/mpeg",
4989
+ ".mov": "video/quicktime",
4990
+ ".webm": "video/webm"
4991
+ };
4985
4992
  var Text = class {
4986
4993
  type = "text";
4987
4994
  text;
@@ -5079,6 +5086,43 @@ var Audio = class _Audio {
5079
5086
  return new _Audio({ data: base64Data, format });
5080
5087
  }
5081
5088
  };
5089
+ var Video = class _Video {
5090
+ type = "video_url";
5091
+ videoUrl;
5092
+ constructor(videoUrl) {
5093
+ this.videoUrl = videoUrl;
5094
+ }
5095
+ /**
5096
+ * Create Video from a local file by converting to a base64 data URL.
5097
+ */
5098
+ static async fromFile(filePath) {
5099
+ const absolutePath = resolve(filePath);
5100
+ const buffer = await readFile(absolutePath);
5101
+ const base64Data = buffer.toString("base64");
5102
+ const ext = getExtension(absolutePath).toLowerCase();
5103
+ const mimeType = VIDEO_MIME_TYPES[ext] || "video/mp4";
5104
+ return new _Video({ url: `data:${mimeType};base64,${base64Data}` });
5105
+ }
5106
+ /**
5107
+ * Create Video from a URL.
5108
+ */
5109
+ static fromUrl(url) {
5110
+ return new _Video({ url });
5111
+ }
5112
+ /**
5113
+ * Create Video from a buffer.
5114
+ */
5115
+ static async fromBuffer(buffer, mimeType = "video/mp4") {
5116
+ const base64Data = Buffer.from(buffer).toString("base64");
5117
+ return new _Video({ url: `data:${mimeType};base64,${base64Data}` });
5118
+ }
5119
+ /**
5120
+ * Create Video from a base64 string.
5121
+ */
5122
+ static async fromBase64(base64Data, mimeType = "video/mp4") {
5123
+ return new _Video({ url: `data:${mimeType};base64,${base64Data}` });
5124
+ }
5125
+ };
5082
5126
  var File = class _File {
5083
5127
  type = "file";
5084
5128
  file;
@@ -5133,6 +5177,9 @@ function guessMimeType(filePath) {
5133
5177
  if (ext in AUDIO_MIME_TYPES) {
5134
5178
  return AUDIO_MIME_TYPES[ext];
5135
5179
  }
5180
+ if (ext in VIDEO_MIME_TYPES) {
5181
+ return VIDEO_MIME_TYPES[ext];
5182
+ }
5136
5183
  const documentMimeTypes = {
5137
5184
  ".pdf": "application/pdf",
5138
5185
  ".doc": "application/msword",
@@ -5175,6 +5222,18 @@ async function audioFromBuffer(buffer, format = "wav") {
5175
5222
  async function audioFromBase64(base64Data, format = "wav") {
5176
5223
  return Audio.fromBase64(base64Data, format);
5177
5224
  }
5225
+ async function videoFromFile(filePath) {
5226
+ return Video.fromFile(filePath);
5227
+ }
5228
+ function videoFromUrl(url) {
5229
+ return Video.fromUrl(url);
5230
+ }
5231
+ async function videoFromBuffer(buffer, mimeType = "video/mp4") {
5232
+ return Video.fromBuffer(buffer, mimeType);
5233
+ }
5234
+ async function videoFromBase64(base64Data, mimeType = "video/mp4") {
5235
+ return Video.fromBase64(base64Data, mimeType);
5236
+ }
5178
5237
  async function fileFromPath(filePath, mimeType) {
5179
5238
  return File.fromFile(filePath, mimeType);
5180
5239
  }
@@ -5591,6 +5650,8 @@ var DEFAULT_TIMEOUT = 6e5;
5591
5650
  var API_TIMEOUT = 3e4;
5592
5651
  var DOWNLOAD_TIMEOUT = 12e4;
5593
5652
  var MAX_CONSECUTIVE_PARSE_ERRORS = 50;
5653
+ var DEFAULT_IMAGE_MODEL = "google/gemini-3.1-flash-image-preview";
5654
+ var DEFAULT_TTS_MODEL = "hexgrad/kokoro-82m";
5594
5655
  var apiKeyStore = /* @__PURE__ */ new WeakMap();
5595
5656
  var modelMetaStore = /* @__PURE__ */ new WeakMap();
5596
5657
  function emptyMediaResponse(raw) {
@@ -5599,6 +5660,9 @@ function emptyMediaResponse(raw) {
5599
5660
  function stripPrefix(model) {
5600
5661
  return model.startsWith("openrouter/") ? model.slice("openrouter/".length) : model;
5601
5662
  }
5663
+ function defaultVoiceForModel(model) {
5664
+ return stripPrefix(model) === "hexgrad/kokoro-82m" ? "af_alloy" : "alloy";
5665
+ }
5602
5666
  function wrapPcm16AsWav(pcm, sampleRate = 24e3) {
5603
5667
  const channels = 1;
5604
5668
  const bitsPerSample = 16;
@@ -5857,7 +5921,7 @@ var OpenRouterMediaProvider = class {
5857
5921
  }
5858
5922
  // ── Image ──────────────────────────────────────────────────────────
5859
5923
  async generateImage(request) {
5860
- const model = stripPrefix(request.model ?? "openai/gpt-image-1");
5924
+ const model = stripPrefix(request.model ?? DEFAULT_IMAGE_MODEL);
5861
5925
  let userContent = request.prompt;
5862
5926
  if (request.imageUrls && request.imageUrls.length > 0) {
5863
5927
  userContent = [
@@ -5946,7 +6010,8 @@ var OpenRouterMediaProvider = class {
5946
6010
  }
5947
6011
  // ── Audio ──────────────────────────────────────────────────────────
5948
6012
  async generateAudio(request) {
5949
- const model = stripPrefix(request.model ?? "openai/gpt-4o-mini-tts");
6013
+ const model = stripPrefix(request.model ?? DEFAULT_TTS_MODEL);
6014
+ const voice = request.voice ?? defaultVoiceForModel(model);
5950
6015
  const requestedFormat = request.format ?? "wav";
5951
6016
  const meta = await this.fetchModelMeta(model);
5952
6017
  const outMods = meta.outputModalities;
@@ -5955,7 +6020,7 @@ var OpenRouterMediaProvider = class {
5955
6020
  return this.generateAudioViaSpeechEndpoint(
5956
6021
  model,
5957
6022
  request.text,
5958
- request.voice ?? "alloy",
6023
+ voice,
5959
6024
  requestedFormat,
5960
6025
  request
5961
6026
  );
@@ -5968,7 +6033,7 @@ var OpenRouterMediaProvider = class {
5968
6033
  modalities: ["text", "audio"],
5969
6034
  stream: true,
5970
6035
  audio: {
5971
- voice: request.voice ?? "alloy",
6036
+ voice,
5972
6037
  format: wireFormat
5973
6038
  }
5974
6039
  };
@@ -6313,6 +6378,6 @@ function sleep2(ms) {
6313
6378
  return new Promise((resolve2) => setTimeout(resolve2, ms));
6314
6379
  }
6315
6380
 
6316
- export { ACTIVE_STATUSES, AIClient, Agent, AgentRouter, ApprovalClient, Audio, CANONICAL_STATUSES, DIDAuthenticator, DidClient, DidInterface, DidManager, ExecutionContext, ExecutionLogger, ExecutionStatus, File, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, HarnessRunner, Image, MediaProviderError, MediaRouter, MemoryClient, MemoryClientBase, MemoryEventClient, MemoryInterface, MultimodalResponse, OpenRouterMediaProvider, RateLimitError, ReasonerContext, SUPPORTED_PROVIDERS, SkillContext, StatelessRateLimiter, TERMINAL_STATUSES, Text, WorkflowReporter, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text };
6381
+ export { ACTIVE_STATUSES, AIClient, Agent, AgentRouter, ApprovalClient, Audio, CANONICAL_STATUSES, DIDAuthenticator, DidClient, DidInterface, DidManager, ExecutionContext, ExecutionLogger, ExecutionStatus, File, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, HarnessRunner, Image, MediaProviderError, MediaRouter, MemoryClient, MemoryClientBase, MemoryEventClient, MemoryInterface, MultimodalResponse, OpenRouterMediaProvider, RateLimitError, ReasonerContext, SUPPORTED_PROVIDERS, SkillContext, StatelessRateLimiter, TERMINAL_STATUSES, Text, Video, WorkflowReporter, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text, videoFromBase64, videoFromBuffer, videoFromFile, videoFromUrl };
6317
6382
  //# sourceMappingURL=index.js.map
6318
6383
  //# sourceMappingURL=index.js.map