@agentfield/sdk 0.1.85-rc.4 → 0.1.85-rc.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +66 -14
- package/dist/index.js +237 -16
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1606,27 +1606,42 @@ declare class MediaProviderError extends Error {
|
|
|
1606
1606
|
cause?: unknown;
|
|
1607
1607
|
});
|
|
1608
1608
|
}
|
|
1609
|
+
/** Frame guidance for image-to-video models (e.g. Veo). */
|
|
1610
|
+
interface VideoFrameImage {
|
|
1611
|
+
/** Image content type — usually "image_url". */
|
|
1612
|
+
type?: string;
|
|
1613
|
+
/** Image URL or `data:` URL. */
|
|
1614
|
+
imageUrl: {
|
|
1615
|
+
url: string;
|
|
1616
|
+
};
|
|
1617
|
+
/** Which frame this image controls. */
|
|
1618
|
+
frameType?: 'first_frame' | 'last_frame';
|
|
1619
|
+
}
|
|
1620
|
+
/** Reference image for style / subject guidance (Veo "reference-to-video"). */
|
|
1621
|
+
interface VideoInputReference {
|
|
1622
|
+
type?: string;
|
|
1623
|
+
imageUrl: {
|
|
1624
|
+
url: string;
|
|
1625
|
+
};
|
|
1626
|
+
}
|
|
1609
1627
|
interface VideoRequest {
|
|
1610
1628
|
prompt: string;
|
|
1611
1629
|
model?: string;
|
|
1630
|
+
/** Duration in seconds (model-dependent — typically 4, 6, or 8). */
|
|
1612
1631
|
duration?: number;
|
|
1613
1632
|
resolution?: '480p' | '720p' | '1080p' | '1K' | '2K' | '4K';
|
|
1614
1633
|
aspectRatio?: '16:9' | '9:16' | '1:1' | '4:3' | '3:4' | '21:9' | '9:21';
|
|
1634
|
+
/** Toggle synchronized audio track (when model supports it). */
|
|
1615
1635
|
generateAudio?: boolean;
|
|
1616
1636
|
seed?: number;
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
type: string;
|
|
1626
|
-
imageUrl: {
|
|
1627
|
-
url: string;
|
|
1628
|
-
};
|
|
1629
|
-
}>;
|
|
1637
|
+
/** Single input image for image-to-video (legacy convenience field). */
|
|
1638
|
+
imageUrl?: string;
|
|
1639
|
+
/** Per-frame guidance — first_frame / last_frame. Takes precedence over `imageUrl`. */
|
|
1640
|
+
frameImages?: VideoFrameImage[];
|
|
1641
|
+
/** Reference images for style/subject guidance. */
|
|
1642
|
+
inputReferences?: VideoInputReference[];
|
|
1643
|
+
/** Model-specific passthrough parameters (e.g. Veo's `personGeneration`). */
|
|
1644
|
+
extra?: Record<string, unknown>;
|
|
1630
1645
|
pollInterval?: number;
|
|
1631
1646
|
timeout?: number;
|
|
1632
1647
|
}
|
|
@@ -1635,21 +1650,37 @@ interface ImageRequest {
|
|
|
1635
1650
|
model?: string;
|
|
1636
1651
|
size?: string;
|
|
1637
1652
|
quality?: string;
|
|
1653
|
+
/** Reference / source image(s) for image+text→image models (e.g. grok-imagine). */
|
|
1654
|
+
imageUrls?: string[];
|
|
1638
1655
|
imageConfig?: {
|
|
1639
1656
|
aspectRatio?: string;
|
|
1640
1657
|
imageSize?: string;
|
|
1658
|
+
/** Image-to-image blend strength (model-dependent, 0–1). */
|
|
1659
|
+
strength?: number;
|
|
1660
|
+
/** Style hint — Recraft V3 etc. */
|
|
1661
|
+
style?: string;
|
|
1662
|
+
/** RGB color palette — array of [r,g,b]. */
|
|
1663
|
+
rgbColors?: number[][];
|
|
1664
|
+
/** Background color hint as [r,g,b]. */
|
|
1665
|
+
backgroundRgbColor?: number[];
|
|
1641
1666
|
superResolutionReferences?: string[];
|
|
1642
1667
|
fontInputs?: Array<{
|
|
1643
1668
|
fontUrl: string;
|
|
1644
1669
|
text: string;
|
|
1645
1670
|
}>;
|
|
1646
1671
|
};
|
|
1672
|
+
/** Model-specific passthrough parameters. */
|
|
1673
|
+
extra?: Record<string, unknown>;
|
|
1647
1674
|
}
|
|
1648
1675
|
interface AudioRequest {
|
|
1649
1676
|
text: string;
|
|
1650
1677
|
model?: string;
|
|
1651
1678
|
voice?: string;
|
|
1652
1679
|
format?: string;
|
|
1680
|
+
/** Playback speed multiplier (OpenAI TTS only — other models ignore). */
|
|
1681
|
+
speed?: number;
|
|
1682
|
+
/** Model-specific passthrough parameters. */
|
|
1683
|
+
extra?: Record<string, unknown>;
|
|
1653
1684
|
}
|
|
1654
1685
|
interface MediaResponse {
|
|
1655
1686
|
text: string;
|
|
@@ -1713,11 +1744,32 @@ declare class OpenRouterMediaProvider implements MediaProvider {
|
|
|
1713
1744
|
readonly supportedModalities: string[];
|
|
1714
1745
|
private readonly baseUrl;
|
|
1715
1746
|
constructor(options?: OpenRouterMediaProviderOptions);
|
|
1747
|
+
/**
|
|
1748
|
+
* Seed the metadata cache for a model. Useful when running against test
|
|
1749
|
+
* servers that don't expose `GET /models/{id}/endpoints`, or when callers
|
|
1750
|
+
* already know the routing they want.
|
|
1751
|
+
*
|
|
1752
|
+
* Output modalities follow OpenRouter's convention — `["speech"]` for
|
|
1753
|
+
* TTS-only (Kokoro etc.), `["text","audio"]` for chat-audio (gpt-audio
|
|
1754
|
+
* family), `["video"]`, `["image"]`, etc.
|
|
1755
|
+
*/
|
|
1756
|
+
seedModelMeta(model: string, outputModalities: string[], inputModalities?: string[]): void;
|
|
1757
|
+
/**
|
|
1758
|
+
* Fetch + cache OpenRouter model metadata so we can route requests to the
|
|
1759
|
+
* right endpoint. On any error returns an empty meta object so callers can
|
|
1760
|
+
* fall back to defaults.
|
|
1761
|
+
*/
|
|
1762
|
+
private fetchModelMeta;
|
|
1716
1763
|
/** Prevent API key from leaking via JSON.stringify (CR-03). */
|
|
1717
1764
|
toJSON(): Record<string, unknown>;
|
|
1718
1765
|
generateVideo(request: VideoRequest): Promise<MediaResponse>;
|
|
1719
1766
|
generateImage(request: ImageRequest): Promise<MediaResponse>;
|
|
1720
1767
|
generateAudio(request: AudioRequest): Promise<MediaResponse>;
|
|
1768
|
+
/**
|
|
1769
|
+
* Call OpenRouter's OpenAI-compatible TTS endpoint (`POST /audio/speech`).
|
|
1770
|
+
* Returns raw bytes for the requested format; wraps PCM → WAV when needed.
|
|
1771
|
+
*/
|
|
1772
|
+
private generateAudioViaSpeechEndpoint;
|
|
1721
1773
|
private post;
|
|
1722
1774
|
private get;
|
|
1723
1775
|
}
|
|
@@ -1827,4 +1879,4 @@ declare class ApprovalClient {
|
|
|
1827
1879
|
waitForApproval(executionId: string, opts?: WaitForApprovalOptions): Promise<ApprovalStatusResponse>;
|
|
1828
1880
|
}
|
|
1829
1881
|
|
|
1830
|
-
export { ACTIVE_STATUSES, AIClient, type AIConfig, type AIEmbeddingOptions, type AIRequestOptions, type AIStream, type AIToolRequestOptions, Agent, type AgentCapability, type AgentConfig, type AgentHandler, AgentRouter, type AgentRouterOptions, type AgentState, ApprovalClient, type ApprovalRequestResponse, type ApprovalStatusResponse, Audio, type AudioOutput, type AudioRequest, type AuditTrailExport, type AuditTrailFilters, type Awaitable, CANONICAL_STATUSES, type CompactCapability, type CompactDiscoveryResponse, DIDAuthenticator, type DIDIdentity, type DIDIdentityPackage, type DIDRegistrationRequest, type DIDRegistrationResponse, type DeploymentType, DidClient, DidInterface, DidManager, type DiscoveryFormat, type DiscoveryOptions, type DiscoveryPagination, type DiscoveryResponse, type DiscoveryResult, ExecutionContext, type ExecutionCredential, type ExecutionLogAttributes, type ExecutionLogBatchPayload, type ExecutionLogContext, type ExecutionLogEmitOptions, type ExecutionLogEntry, type ExecutionLogLevel, type ExecutionLogTransport, type ExecutionLogTransportPayload, type ExecutionLogWireEntry, ExecutionLogger, type ExecutionLoggerOptions, type ExecutionMetadata, ExecutionStatus, type ExecutionStatusValue, File, type FileOutput, type GenerateCredentialOptions, type GenerateCredentialParams, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, type HarnessConfig, type HarnessOptions, type HarnessProvider, type HarnessResult, HarnessRunner, type HealthStatus, Image, type ImageOutput, type ImageRequest, type MediaProvider, MediaProviderError, type MediaResponse, MediaRouter, type MemoryChangeEvent, MemoryClient, MemoryClientBase, type MemoryConfig, MemoryEventClient, type MemoryEventHandler, type MemoryEventHistoryOptions, MemoryInterface, type MemoryRequestMetadata, type MemoryRequestOptions, type MemoryScope, type MemoryWatchHandler, type Metrics, type MultimodalContent, MultimodalResponse, OpenRouterMediaProvider, type OpenRouterMediaProviderOptions, RateLimitError, type RateLimiterOptions, type RawExecutionContext, type RawResult, type ReasonerCapability, ReasonerContext, type ReasonerDefinition, type ReasonerHandler, type ReasonerOptions, type RequestApprovalPayload, SUPPORTED_PROVIDERS, type ServerlessAdapter, type ServerlessEvent, type ServerlessResponse, type SkillCapability, SkillContext, type SkillDefinition, type SkillHandler, type SkillOptions, StatelessRateLimiter, TERMINAL_STATUSES, Text, type ToolCallConfig, type ToolCallRecord, type ToolCallTrace, type ToolsOption, type VectorSearchOptions, type VectorSearchResult, type VideoRequest, type WaitForApprovalOptions, type WorkflowCredential, type WorkflowMetadata, type WorkflowProgressOptions, WorkflowReporter, type ZodSchema, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text };
|
|
1882
|
+
export { ACTIVE_STATUSES, AIClient, type AIConfig, type AIEmbeddingOptions, type AIRequestOptions, type AIStream, type AIToolRequestOptions, Agent, type AgentCapability, type AgentConfig, type AgentHandler, AgentRouter, type AgentRouterOptions, type AgentState, ApprovalClient, type ApprovalRequestResponse, type ApprovalStatusResponse, Audio, type AudioOutput, type AudioRequest, type AuditTrailExport, type AuditTrailFilters, type Awaitable, CANONICAL_STATUSES, type CompactCapability, type CompactDiscoveryResponse, DIDAuthenticator, type DIDIdentity, type DIDIdentityPackage, type DIDRegistrationRequest, type DIDRegistrationResponse, type DeploymentType, DidClient, DidInterface, DidManager, type DiscoveryFormat, type DiscoveryOptions, type DiscoveryPagination, type DiscoveryResponse, type DiscoveryResult, ExecutionContext, type ExecutionCredential, type ExecutionLogAttributes, type ExecutionLogBatchPayload, type ExecutionLogContext, type ExecutionLogEmitOptions, type ExecutionLogEntry, type ExecutionLogLevel, type ExecutionLogTransport, type ExecutionLogTransportPayload, type ExecutionLogWireEntry, ExecutionLogger, type ExecutionLoggerOptions, type ExecutionMetadata, ExecutionStatus, type ExecutionStatusValue, File, type FileOutput, type GenerateCredentialOptions, type GenerateCredentialParams, HEADER_CALLER_DID, HEADER_DID_NONCE, HEADER_DID_SIGNATURE, HEADER_DID_TIMESTAMP, type HarnessConfig, type HarnessOptions, type HarnessProvider, type HarnessResult, HarnessRunner, type HealthStatus, Image, type ImageOutput, type ImageRequest, type MediaProvider, MediaProviderError, type MediaResponse, MediaRouter, type MemoryChangeEvent, MemoryClient, MemoryClientBase, type MemoryConfig, MemoryEventClient, type MemoryEventHandler, type MemoryEventHistoryOptions, MemoryInterface, type MemoryRequestMetadata, type MemoryRequestOptions, type MemoryScope, type MemoryWatchHandler, type Metrics, type MultimodalContent, MultimodalResponse, OpenRouterMediaProvider, type OpenRouterMediaProviderOptions, RateLimitError, type RateLimiterOptions, type RawExecutionContext, type RawResult, type ReasonerCapability, ReasonerContext, type ReasonerDefinition, type ReasonerHandler, type ReasonerOptions, type RequestApprovalPayload, SUPPORTED_PROVIDERS, type ServerlessAdapter, type ServerlessEvent, type ServerlessResponse, type SkillCapability, SkillContext, type SkillDefinition, type SkillHandler, type SkillOptions, StatelessRateLimiter, TERMINAL_STATUSES, Text, type ToolCallConfig, type ToolCallRecord, type ToolCallTrace, type ToolsOption, type VectorSearchOptions, type VectorSearchResult, type VideoFrameImage, type VideoInputReference, type VideoRequest, type WaitForApprovalOptions, type WorkflowCredential, type WorkflowMetadata, type WorkflowProgressOptions, WorkflowReporter, type ZodSchema, audioFromBase64, audioFromBuffer, audioFromFile, audioFromUrl, buildProvider, buildToolConfig, capabilitiesToTools, capabilityToMetadataTool, capabilityToTool, createExecutionLogger, createHarnessResult, createMetrics, createMultimodalResponse, createRawResult, executeToolCallLoop, fileFromBase64, fileFromBuffer, fileFromPath, fileFromUrl, getCurrentContext, getCurrentSkillContext, imageFromBase64, imageFromBuffer, imageFromFile, imageFromUrl, isActive, isExecutionLogBatchPayload, isTerminal, normalizeExecutionLogEntry, normalizeStatus, serializeExecutionLogEntry, text };
|
package/dist/index.js
CHANGED
|
@@ -5578,12 +5578,55 @@ var API_TIMEOUT = 3e4;
|
|
|
5578
5578
|
var DOWNLOAD_TIMEOUT = 12e4;
|
|
5579
5579
|
var MAX_CONSECUTIVE_PARSE_ERRORS = 50;
|
|
5580
5580
|
var apiKeyStore = /* @__PURE__ */ new WeakMap();
|
|
5581
|
+
var modelMetaStore = /* @__PURE__ */ new WeakMap();
|
|
5581
5582
|
function emptyMediaResponse(raw) {
|
|
5582
5583
|
return { text: "", images: [], audio: null, files: [], videos: [], rawResponse: raw };
|
|
5583
5584
|
}
|
|
5584
5585
|
function stripPrefix(model) {
|
|
5585
5586
|
return model.startsWith("openrouter/") ? model.slice("openrouter/".length) : model;
|
|
5586
5587
|
}
|
|
5588
|
+
function wrapPcm16AsWav(pcm, sampleRate = 24e3) {
|
|
5589
|
+
const channels = 1;
|
|
5590
|
+
const bitsPerSample = 16;
|
|
5591
|
+
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
|
5592
|
+
const blockAlign = channels * bitsPerSample / 8;
|
|
5593
|
+
const dataSize = pcm.byteLength;
|
|
5594
|
+
const buffer = new ArrayBuffer(44 + dataSize);
|
|
5595
|
+
const view = new DataView(buffer);
|
|
5596
|
+
view.setUint8(0, 82);
|
|
5597
|
+
view.setUint8(1, 73);
|
|
5598
|
+
view.setUint8(2, 70);
|
|
5599
|
+
view.setUint8(3, 70);
|
|
5600
|
+
view.setUint32(4, 36 + dataSize, true);
|
|
5601
|
+
view.setUint8(8, 87);
|
|
5602
|
+
view.setUint8(9, 65);
|
|
5603
|
+
view.setUint8(10, 86);
|
|
5604
|
+
view.setUint8(11, 69);
|
|
5605
|
+
view.setUint8(12, 102);
|
|
5606
|
+
view.setUint8(13, 109);
|
|
5607
|
+
view.setUint8(14, 116);
|
|
5608
|
+
view.setUint8(15, 32);
|
|
5609
|
+
view.setUint32(16, 16, true);
|
|
5610
|
+
view.setUint16(20, 1, true);
|
|
5611
|
+
view.setUint16(22, channels, true);
|
|
5612
|
+
view.setUint32(24, sampleRate, true);
|
|
5613
|
+
view.setUint32(28, byteRate, true);
|
|
5614
|
+
view.setUint16(32, blockAlign, true);
|
|
5615
|
+
view.setUint16(34, bitsPerSample, true);
|
|
5616
|
+
view.setUint8(36, 100);
|
|
5617
|
+
view.setUint8(37, 97);
|
|
5618
|
+
view.setUint8(38, 116);
|
|
5619
|
+
view.setUint8(39, 97);
|
|
5620
|
+
view.setUint32(40, dataSize, true);
|
|
5621
|
+
new Uint8Array(buffer, 44).set(pcm);
|
|
5622
|
+
return new Uint8Array(buffer);
|
|
5623
|
+
}
|
|
5624
|
+
function bytesToBase64(bytes) {
|
|
5625
|
+
return Buffer.from(bytes).toString("base64");
|
|
5626
|
+
}
|
|
5627
|
+
function base64ToBytes(b64) {
|
|
5628
|
+
return new Uint8Array(Buffer.from(b64, "base64"));
|
|
5629
|
+
}
|
|
5587
5630
|
function assertSafeUrl(urlStr) {
|
|
5588
5631
|
let parsed;
|
|
5589
5632
|
try {
|
|
@@ -5623,6 +5666,56 @@ var OpenRouterMediaProvider = class {
|
|
|
5623
5666
|
});
|
|
5624
5667
|
}
|
|
5625
5668
|
apiKeyStore.set(this, key);
|
|
5669
|
+
modelMetaStore.set(this, /* @__PURE__ */ new Map());
|
|
5670
|
+
}
|
|
5671
|
+
/**
|
|
5672
|
+
* Seed the metadata cache for a model. Useful when running against test
|
|
5673
|
+
* servers that don't expose `GET /models/{id}/endpoints`, or when callers
|
|
5674
|
+
* already know the routing they want.
|
|
5675
|
+
*
|
|
5676
|
+
* Output modalities follow OpenRouter's convention — `["speech"]` for
|
|
5677
|
+
* TTS-only (Kokoro etc.), `["text","audio"]` for chat-audio (gpt-audio
|
|
5678
|
+
* family), `["video"]`, `["image"]`, etc.
|
|
5679
|
+
*/
|
|
5680
|
+
seedModelMeta(model, outputModalities, inputModalities = []) {
|
|
5681
|
+
const stripped = stripPrefix(model);
|
|
5682
|
+
const cache = modelMetaStore.get(this);
|
|
5683
|
+
cache.set(stripped, {
|
|
5684
|
+
outputModalities: [...outputModalities],
|
|
5685
|
+
inputModalities: [...inputModalities]
|
|
5686
|
+
});
|
|
5687
|
+
}
|
|
5688
|
+
/**
|
|
5689
|
+
* Fetch + cache OpenRouter model metadata so we can route requests to the
|
|
5690
|
+
* right endpoint. On any error returns an empty meta object so callers can
|
|
5691
|
+
* fall back to defaults.
|
|
5692
|
+
*/
|
|
5693
|
+
async fetchModelMeta(model) {
|
|
5694
|
+
const stripped = stripPrefix(model);
|
|
5695
|
+
const cache = modelMetaStore.get(this);
|
|
5696
|
+
const cached = cache.get(stripped);
|
|
5697
|
+
if (cached) return cached;
|
|
5698
|
+
const url = `${this.baseUrl}/models/${stripped}/endpoints`;
|
|
5699
|
+
try {
|
|
5700
|
+
const res = await this.get(url);
|
|
5701
|
+
if (!res.ok) {
|
|
5702
|
+
const meta2 = { outputModalities: [], inputModalities: [] };
|
|
5703
|
+
cache.set(stripped, meta2);
|
|
5704
|
+
return meta2;
|
|
5705
|
+
}
|
|
5706
|
+
const data = await res.json();
|
|
5707
|
+
const arch = data?.data?.architecture ?? {};
|
|
5708
|
+
const meta = {
|
|
5709
|
+
outputModalities: arch.output_modalities ?? [],
|
|
5710
|
+
inputModalities: arch.input_modalities ?? []
|
|
5711
|
+
};
|
|
5712
|
+
cache.set(stripped, meta);
|
|
5713
|
+
return meta;
|
|
5714
|
+
} catch {
|
|
5715
|
+
const meta = { outputModalities: [], inputModalities: [] };
|
|
5716
|
+
cache.set(stripped, meta);
|
|
5717
|
+
return meta;
|
|
5718
|
+
}
|
|
5626
5719
|
}
|
|
5627
5720
|
/** Prevent API key from leaking via JSON.stringify (CR-03). */
|
|
5628
5721
|
toJSON() {
|
|
@@ -5646,8 +5739,21 @@ var OpenRouterMediaProvider = class {
|
|
|
5646
5739
|
if (request.aspectRatio) body.aspect_ratio = request.aspectRatio;
|
|
5647
5740
|
if (request.generateAudio != null) body.generate_audio = request.generateAudio;
|
|
5648
5741
|
if (request.seed != null) body.seed = request.seed;
|
|
5649
|
-
if (request.
|
|
5650
|
-
if (request.
|
|
5742
|
+
if (request.imageUrl) body.image_url = request.imageUrl;
|
|
5743
|
+
if (request.frameImages) {
|
|
5744
|
+
body.frame_images = request.frameImages.map((fi) => ({
|
|
5745
|
+
type: fi.type ?? "image_url",
|
|
5746
|
+
image_url: fi.imageUrl,
|
|
5747
|
+
...fi.frameType ? { frame_type: fi.frameType } : {}
|
|
5748
|
+
}));
|
|
5749
|
+
}
|
|
5750
|
+
if (request.inputReferences) {
|
|
5751
|
+
body.input_references = request.inputReferences.map((ref) => ({
|
|
5752
|
+
type: ref.type ?? "image_url",
|
|
5753
|
+
image_url: ref.imageUrl
|
|
5754
|
+
}));
|
|
5755
|
+
}
|
|
5756
|
+
if (request.extra) Object.assign(body, request.extra);
|
|
5651
5757
|
const submitEndpoint = `${this.baseUrl}/videos`;
|
|
5652
5758
|
const submitRes = await this.post(submitEndpoint, body);
|
|
5653
5759
|
if (!submitRes.ok) {
|
|
@@ -5696,13 +5802,24 @@ var OpenRouterMediaProvider = class {
|
|
|
5696
5802
|
{ provider: "openrouter", model }
|
|
5697
5803
|
);
|
|
5698
5804
|
}
|
|
5805
|
+
const unsignedUrls = jobData.unsigned_urls;
|
|
5699
5806
|
const unsignedUrl = jobData.unsigned_url;
|
|
5700
5807
|
const signedUrl = jobData.url;
|
|
5701
|
-
const videoUrl = unsignedUrl ?? signedUrl;
|
|
5808
|
+
const videoUrl = unsignedUrls?.[0] ?? unsignedUrl ?? signedUrl;
|
|
5702
5809
|
let videoData;
|
|
5703
5810
|
if (videoUrl) {
|
|
5704
5811
|
assertSafeUrl(videoUrl);
|
|
5812
|
+
const downloadHeaders = {};
|
|
5813
|
+
try {
|
|
5814
|
+
const host = new URL(videoUrl).hostname.toLowerCase();
|
|
5815
|
+
if (host === "openrouter.ai" || host.endsWith(".openrouter.ai")) {
|
|
5816
|
+
const key = apiKeyStore.get(this);
|
|
5817
|
+
if (key) downloadHeaders.Authorization = `Bearer ${key}`;
|
|
5818
|
+
}
|
|
5819
|
+
} catch {
|
|
5820
|
+
}
|
|
5705
5821
|
const dlRes = await fetch(videoUrl, {
|
|
5822
|
+
headers: downloadHeaders,
|
|
5706
5823
|
signal: AbortSignal.timeout(DOWNLOAD_TIMEOUT),
|
|
5707
5824
|
redirect: "error"
|
|
5708
5825
|
});
|
|
@@ -5727,15 +5844,43 @@ var OpenRouterMediaProvider = class {
|
|
|
5727
5844
|
// ── Image ──────────────────────────────────────────────────────────
|
|
5728
5845
|
async generateImage(request) {
|
|
5729
5846
|
const model = stripPrefix(request.model ?? "openai/gpt-image-1");
|
|
5730
|
-
|
|
5847
|
+
let userContent = request.prompt;
|
|
5848
|
+
if (request.imageUrls && request.imageUrls.length > 0) {
|
|
5849
|
+
userContent = [
|
|
5850
|
+
{ type: "text", text: request.prompt },
|
|
5851
|
+
...request.imageUrls.map((url) => ({
|
|
5852
|
+
type: "image_url",
|
|
5853
|
+
image_url: { url }
|
|
5854
|
+
}))
|
|
5855
|
+
];
|
|
5856
|
+
}
|
|
5857
|
+
const messages = [{ role: "user", content: userContent }];
|
|
5731
5858
|
const body = {
|
|
5732
5859
|
model,
|
|
5733
5860
|
messages,
|
|
5734
|
-
modalities: ["image"
|
|
5861
|
+
modalities: ["image"]
|
|
5735
5862
|
};
|
|
5736
5863
|
if (request.size) body.size = request.size;
|
|
5737
5864
|
if (request.quality) body.quality = request.quality;
|
|
5738
|
-
if (request.imageConfig)
|
|
5865
|
+
if (request.imageConfig) {
|
|
5866
|
+
const ic = request.imageConfig;
|
|
5867
|
+
const out = {};
|
|
5868
|
+
if (ic.aspectRatio) out.aspect_ratio = ic.aspectRatio;
|
|
5869
|
+
if (ic.imageSize) out.image_size = ic.imageSize;
|
|
5870
|
+
if (ic.strength != null) out.strength = ic.strength;
|
|
5871
|
+
if (ic.style) out.style = ic.style;
|
|
5872
|
+
if (ic.rgbColors) out.rgb_colors = ic.rgbColors;
|
|
5873
|
+
if (ic.backgroundRgbColor) out.background_rgb_color = ic.backgroundRgbColor;
|
|
5874
|
+
if (ic.superResolutionReferences) out.super_resolution_references = ic.superResolutionReferences;
|
|
5875
|
+
if (ic.fontInputs) {
|
|
5876
|
+
out.font_inputs = ic.fontInputs.map((fi) => ({
|
|
5877
|
+
font_url: fi.fontUrl,
|
|
5878
|
+
text: fi.text
|
|
5879
|
+
}));
|
|
5880
|
+
}
|
|
5881
|
+
body.image_config = out;
|
|
5882
|
+
}
|
|
5883
|
+
if (request.extra) Object.assign(body, request.extra);
|
|
5739
5884
|
const endpoint = `${this.baseUrl}/chat/completions`;
|
|
5740
5885
|
const res = await this.post(endpoint, body);
|
|
5741
5886
|
if (!res.ok) {
|
|
@@ -5746,6 +5891,15 @@ var OpenRouterMediaProvider = class {
|
|
|
5746
5891
|
}
|
|
5747
5892
|
const data = await res.json();
|
|
5748
5893
|
const resp = emptyMediaResponse(data);
|
|
5894
|
+
const pushImageFromUrl = (url) => {
|
|
5895
|
+
if (!url) return;
|
|
5896
|
+
if (url.startsWith("data:")) {
|
|
5897
|
+
const b64 = url.split(",", 2)[1];
|
|
5898
|
+
resp.images.push({ url, b64Json: b64 });
|
|
5899
|
+
} else {
|
|
5900
|
+
resp.images.push({ url });
|
|
5901
|
+
}
|
|
5902
|
+
};
|
|
5749
5903
|
const choices = data.choices;
|
|
5750
5904
|
if (choices) {
|
|
5751
5905
|
for (const choice of choices) {
|
|
@@ -5761,16 +5915,17 @@ var OpenRouterMediaProvider = class {
|
|
|
5761
5915
|
resp.text += p.text;
|
|
5762
5916
|
} else if (p.type === "image_url") {
|
|
5763
5917
|
const imgUrl = p.image_url;
|
|
5764
|
-
|
|
5765
|
-
if (url?.startsWith("data:")) {
|
|
5766
|
-
const b64 = url.split(",", 2)[1];
|
|
5767
|
-
resp.images.push({ url, b64Json: b64 });
|
|
5768
|
-
} else if (url) {
|
|
5769
|
-
resp.images.push({ url });
|
|
5770
|
-
}
|
|
5918
|
+
pushImageFromUrl(imgUrl?.url);
|
|
5771
5919
|
}
|
|
5772
5920
|
}
|
|
5773
5921
|
}
|
|
5922
|
+
const images = msg.images;
|
|
5923
|
+
if (Array.isArray(images)) {
|
|
5924
|
+
for (const img of images) {
|
|
5925
|
+
const imgUrl = img.image_url;
|
|
5926
|
+
pushImageFromUrl(imgUrl?.url);
|
|
5927
|
+
}
|
|
5928
|
+
}
|
|
5774
5929
|
}
|
|
5775
5930
|
}
|
|
5776
5931
|
return resp;
|
|
@@ -5778,6 +5933,20 @@ var OpenRouterMediaProvider = class {
|
|
|
5778
5933
|
// ── Audio ──────────────────────────────────────────────────────────
|
|
5779
5934
|
async generateAudio(request) {
|
|
5780
5935
|
const model = stripPrefix(request.model ?? "openai/gpt-4o-mini-tts");
|
|
5936
|
+
const requestedFormat = request.format ?? "wav";
|
|
5937
|
+
const meta = await this.fetchModelMeta(model);
|
|
5938
|
+
const outMods = meta.outputModalities;
|
|
5939
|
+
const useSpeechEndpoint = outMods.includes("speech") || outMods.length === 0 || !outMods.includes("audio");
|
|
5940
|
+
if (useSpeechEndpoint) {
|
|
5941
|
+
return this.generateAudioViaSpeechEndpoint(
|
|
5942
|
+
model,
|
|
5943
|
+
request.text,
|
|
5944
|
+
request.voice ?? "alloy",
|
|
5945
|
+
requestedFormat,
|
|
5946
|
+
request
|
|
5947
|
+
);
|
|
5948
|
+
}
|
|
5949
|
+
const wireFormat = requestedFormat === "wav" ? "pcm16" : requestedFormat;
|
|
5781
5950
|
const messages = [{ role: "user", content: request.text }];
|
|
5782
5951
|
const body = {
|
|
5783
5952
|
model,
|
|
@@ -5786,7 +5955,7 @@ var OpenRouterMediaProvider = class {
|
|
|
5786
5955
|
stream: true,
|
|
5787
5956
|
audio: {
|
|
5788
5957
|
voice: request.voice ?? "alloy",
|
|
5789
|
-
format:
|
|
5958
|
+
format: wireFormat
|
|
5790
5959
|
}
|
|
5791
5960
|
};
|
|
5792
5961
|
const endpoint = `${this.baseUrl}/chat/completions`;
|
|
@@ -5877,13 +6046,65 @@ var OpenRouterMediaProvider = class {
|
|
|
5877
6046
|
const resp = emptyMediaResponse(null);
|
|
5878
6047
|
resp.text = textContent;
|
|
5879
6048
|
if (audioChunks.length > 0) {
|
|
6049
|
+
let b64 = audioChunks.join("");
|
|
6050
|
+
try {
|
|
6051
|
+
const parts = audioChunks.map(base64ToBytes);
|
|
6052
|
+
const total = parts.reduce((n, p) => n + p.byteLength, 0);
|
|
6053
|
+
const merged = new Uint8Array(total);
|
|
6054
|
+
let off = 0;
|
|
6055
|
+
for (const p of parts) {
|
|
6056
|
+
merged.set(p, off);
|
|
6057
|
+
off += p.byteLength;
|
|
6058
|
+
}
|
|
6059
|
+
b64 = bytesToBase64(merged);
|
|
6060
|
+
if (requestedFormat === "wav") {
|
|
6061
|
+
b64 = bytesToBase64(wrapPcm16AsWav(merged));
|
|
6062
|
+
}
|
|
6063
|
+
} catch {
|
|
6064
|
+
}
|
|
5880
6065
|
resp.audio = {
|
|
5881
|
-
data:
|
|
5882
|
-
format:
|
|
6066
|
+
data: b64,
|
|
6067
|
+
format: requestedFormat
|
|
5883
6068
|
};
|
|
5884
6069
|
}
|
|
5885
6070
|
return resp;
|
|
5886
6071
|
}
|
|
6072
|
+
/**
|
|
6073
|
+
* Call OpenRouter's OpenAI-compatible TTS endpoint (`POST /audio/speech`).
|
|
6074
|
+
* Returns raw bytes for the requested format; wraps PCM → WAV when needed.
|
|
6075
|
+
*/
|
|
6076
|
+
async generateAudioViaSpeechEndpoint(model, text2, voice, requestedFormat, request) {
|
|
6077
|
+
const wireFormat = requestedFormat === "wav" || requestedFormat === "pcm" || requestedFormat === "pcm16" ? "pcm" : requestedFormat;
|
|
6078
|
+
const endpoint = `${this.baseUrl}/audio/speech`;
|
|
6079
|
+
const body = {
|
|
6080
|
+
model,
|
|
6081
|
+
input: text2,
|
|
6082
|
+
voice,
|
|
6083
|
+
response_format: wireFormat
|
|
6084
|
+
};
|
|
6085
|
+
if (request?.speed != null) body.speed = request.speed;
|
|
6086
|
+
if (request?.extra) Object.assign(body, request.extra);
|
|
6087
|
+
const res = await this.post(endpoint, body);
|
|
6088
|
+
if (!res.ok) {
|
|
6089
|
+
throw new MediaProviderError(
|
|
6090
|
+
`Audio generation failed [model=${model}] [endpoint=${endpoint}]: ${res.status} ${await res.text()}`,
|
|
6091
|
+
{ provider: "openrouter", model, endpoint }
|
|
6092
|
+
);
|
|
6093
|
+
}
|
|
6094
|
+
const buf = new Uint8Array(await res.arrayBuffer());
|
|
6095
|
+
const finalBytes = requestedFormat === "wav" ? wrapPcm16AsWav(buf) : buf;
|
|
6096
|
+
const resp = emptyMediaResponse({
|
|
6097
|
+
endpoint: "audio/speech",
|
|
6098
|
+
model,
|
|
6099
|
+
mime_type: res.headers.get("content-type") ?? ""
|
|
6100
|
+
});
|
|
6101
|
+
resp.text = text2;
|
|
6102
|
+
resp.audio = {
|
|
6103
|
+
data: bytesToBase64(finalBytes),
|
|
6104
|
+
format: requestedFormat
|
|
6105
|
+
};
|
|
6106
|
+
return resp;
|
|
6107
|
+
}
|
|
5887
6108
|
// ── Helpers ────────────────────────────────────────────────────────
|
|
5888
6109
|
post(url, body) {
|
|
5889
6110
|
const key = apiKeyStore.get(this);
|