@compose-market/sdk 0.6.94 → 0.6.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/.speakeasy/a2a.arazzo.yaml +1 -1
  2. package/.speakeasy/memory.arazzo.yaml +1 -1
  3. package/.speakeasy/tests.arazzo.yaml +1 -1
  4. package/CHANGELOG.md +16 -0
  5. package/README.md +3 -1
  6. package/dist/index.d.ts +1 -1
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/resources/inference.d.ts +6 -1
  10. package/dist/resources/inference.d.ts.map +1 -1
  11. package/dist/resources/inference.js +466 -0
  12. package/dist/resources/inference.js.map +1 -1
  13. package/dist/types/index.d.ts +71 -0
  14. package/dist/types/index.d.ts.map +1 -1
  15. package/dist/version.d.ts +1 -1
  16. package/dist/version.js +1 -1
  17. package/generated/inference/esm/lib/config.d.ts +3 -3
  18. package/generated/inference/esm/lib/config.js +3 -3
  19. package/generated/inference/package.json +1 -1
  20. package/generated/inference/src/lib/config.ts +3 -3
  21. package/generated/manowar/esm/lib/config.d.ts +3 -3
  22. package/generated/manowar/esm/lib/config.js +3 -3
  23. package/generated/manowar/package.json +1 -1
  24. package/generated/manowar/src/lib/config.ts +3 -3
  25. package/generated/memory/esm/lib/config.d.ts +3 -3
  26. package/generated/memory/esm/lib/config.js +3 -3
  27. package/generated/memory/package.json +1 -1
  28. package/generated/memory/src/lib/config.ts +3 -3
  29. package/generated/x402/esm/lib/config.d.ts +3 -3
  30. package/generated/x402/esm/lib/config.js +3 -3
  31. package/generated/x402/package.json +1 -1
  32. package/generated/x402/src/lib/config.ts +3 -3
  33. package/package.json +1 -1
  34. package/specs/inference.openapi.yaml +1 -1
  35. package/specs/manowar.openapi.yaml +1 -1
  36. package/specs/memory.openapi.yaml +1 -1
  37. package/specs/x402.openapi.yaml +1 -1
@@ -952,10 +952,355 @@ async function* streamVideoStatus(client, ctx, videoId, opts) {
952
952
  catch { /* best-effort */ }
953
953
  }
954
954
  }
955
+ function lower(value) {
956
+ return typeof value === "string" ? value.trim().toLowerCase() : "";
957
+ }
958
+ function list(value) {
959
+ if (Array.isArray(value)) {
960
+ return value.map(lower).filter(Boolean);
961
+ }
962
+ const single = lower(value);
963
+ return single ? [single] : [];
964
+ }
965
+ function ops(info) {
966
+ const raw = info?.operations;
967
+ if (!Array.isArray(raw)) {
968
+ return [];
969
+ }
970
+ return raw.filter((item) => (Boolean(item)
971
+ && typeof item === "object"
972
+ && typeof item.modality === "string"
973
+ && typeof item.operation === "string"));
974
+ }
975
+ function fields(info) {
976
+ return {
977
+ input: list(info?.input),
978
+ output: list(info?.output),
979
+ };
980
+ }
981
+ function part(value, signal) {
982
+ if (!value || typeof value !== "object") {
983
+ if (typeof value === "string" && value.trim())
984
+ signal.text = true;
985
+ return;
986
+ }
987
+ if (typeof Blob !== "undefined" && value instanceof Blob) {
988
+ const type = lower(value.type);
989
+ if (type.startsWith("audio/"))
990
+ signal.audio = true;
991
+ else if (type.startsWith("image/"))
992
+ signal.image = true;
993
+ else if (type.startsWith("video/"))
994
+ signal.video = true;
995
+ return;
996
+ }
997
+ if (isByteArray(value)) {
998
+ signal.audio = true;
999
+ return;
1000
+ }
1001
+ if (Array.isArray(value)) {
1002
+ for (const item of value)
1003
+ part(item, signal);
1004
+ return;
1005
+ }
1006
+ const record = value;
1007
+ const type = lower(record.type ?? record.kind ?? record.mediaType ?? record.media_type);
1008
+ const mime = lower(record.mimeType ?? record.mime_type ?? record.contentType ?? record.content_type);
1009
+ const url = lower(record.url ?? record.uri ?? record.href ?? record.image_url ?? record.audio_url ?? record.video_url);
1010
+ if (type.includes("audio") || mime.startsWith("audio/") || /\.(mp3|m4a|wav|ogg|opus|flac|aac)(?:[?#].*)?$/.test(url))
1011
+ signal.audio = true;
1012
+ if (type.includes("image") || mime.startsWith("image/") || /\.(png|jpe?g|gif|webp|avif|heic|svg)(?:[?#].*)?$/.test(url))
1013
+ signal.image = true;
1014
+ if (type.includes("video") || mime.startsWith("video/") || /\.(mp4|mov|webm|mkv|avi)(?:[?#].*)?$/.test(url))
1015
+ signal.video = true;
1016
+ if (typeof record.text === "string" || typeof record.content === "string")
1017
+ signal.text = true;
1018
+ for (const key of ["content", "input", "messages", "attachments", "attachment"]) {
1019
+ if (record[key] !== undefined)
1020
+ part(record[key], signal);
1021
+ }
1022
+ }
1023
+ function sig(input) {
1024
+ const signal = {
1025
+ text: Boolean(input.prompt || typeof input.input === "string"),
1026
+ image: false,
1027
+ audio: Boolean(input.file),
1028
+ video: false,
1029
+ };
1030
+ part(input.input, signal);
1031
+ part(input.messages, signal);
1032
+ part(input.attachments, signal);
1033
+ part(input.attachment, signal);
1034
+ return signal;
1035
+ }
1036
+ function first(values) {
1037
+ return values.length > 0 ? values[0] : undefined;
1038
+ }
1039
+ function op(input, info) {
1040
+ const wanted = lower(input.operation);
1041
+ const wantedModality = input.modality;
1042
+ const catalog = ops(info);
1043
+ const signal = sig(input);
1044
+ if (wanted) {
1045
+ const exact = catalog.filter((item) => lower(item.operation) === wanted);
1046
+ const selected = first(wantedModality ? exact.filter((item) => item.modality === wantedModality) : exact) ?? exact[0];
1047
+ if (selected)
1048
+ return { modality: selected.modality, operation: selected.operation };
1049
+ if (wantedModality)
1050
+ return { modality: wantedModality, operation: wanted };
1051
+ }
1052
+ if (wantedModality) {
1053
+ const matches = catalog.filter((item) => item.modality === wantedModality);
1054
+ const unique = [...new Map(matches.map((item) => [`${item.modality}:${item.operation}`, item])).values()];
1055
+ if (unique.length === 1)
1056
+ return { modality: unique[0].modality, operation: unique[0].operation };
1057
+ }
1058
+ if (catalog.length === 1) {
1059
+ return { modality: catalog[0].modality, operation: catalog[0].operation };
1060
+ }
1061
+ if (catalog.length > 1) {
1062
+ const matches = catalog.filter((item) => {
1063
+ const inputKinds = list(item.input);
1064
+ return (signal.audio && inputKinds.includes("audio"))
1065
+ || (signal.image && inputKinds.includes("image"))
1066
+ || (signal.video && inputKinds.includes("video"))
1067
+ || (signal.text && inputKinds.includes("text"));
1068
+ });
1069
+ const unique = [...new Map(matches.map((item) => [`${item.modality}:${item.operation}`, item])).values()];
1070
+ if (unique.length === 1)
1071
+ return { modality: unique[0].modality, operation: unique[0].operation };
1072
+ }
1073
+ const io = fields(info);
1074
+ if (io.output.includes("embedding"))
1075
+ return { modality: "embedding", operation: "embedding" };
1076
+ if (io.output.includes("video"))
1077
+ return { modality: "video", operation: io.input.includes("image") ? "image-to-video" : "text-to-video" };
1078
+ if (io.output.includes("image"))
1079
+ return { modality: "image", operation: io.input.includes("image") ? "image-to-image" : "text-to-image" };
1080
+ if (io.output.includes("audio")) {
1081
+ if (io.input.includes("audio") && !io.input.includes("text"))
1082
+ return { modality: "audio", operation: "speech-to-speech" };
1083
+ return { modality: "audio", operation: "text-to-speech" };
1084
+ }
1085
+ if (io.input.includes("audio") && io.output.includes("text"))
1086
+ return { modality: "audio", operation: "speech-to-text" };
1087
+ return { modality: "text", operation: "text-generation" };
1088
+ }
1089
+ function endpoint(plan, input) {
1090
+ const name = lower(plan.operation);
1091
+ const signal = sig(input);
1092
+ if (plan.modality === "embedding" || name.includes("embedding"))
1093
+ return "embeddings";
1094
+ if (name === "speech-to-text" || name.includes("transcription") || name.includes("speech-recognition")) {
1095
+ return input.file ? "audio-transcriptions" : "responses";
1096
+ }
1097
+ if (name === "text-to-speech"
1098
+ || name === "text-to-audio"
1099
+ || name === "music-generation"
1100
+ || name === "sound-effects"
1101
+ || name === "text-to-sound-effects") {
1102
+ return "audio-speech";
1103
+ }
1104
+ if (plan.modality === "audio") {
1105
+ if (signal.audio && !signal.text)
1106
+ return input.file ? "audio-transcriptions" : "responses";
1107
+ if (signal.text && !signal.audio)
1108
+ return "audio-speech";
1109
+ return "responses";
1110
+ }
1111
+ if (plan.modality === "video")
1112
+ return "videos";
1113
+ if (plan.modality === "image")
1114
+ return "images";
1115
+ return input.messages ? "chat" : "responses";
1116
+ }
1117
+ function text(input) {
1118
+ if (typeof input.prompt === "string")
1119
+ return input.prompt;
1120
+ if (typeof input.input === "string")
1121
+ return input.input;
1122
+ if (Array.isArray(input.messages)) {
1123
+ return input.messages
1124
+ .map((message) => typeof message.content === "string" ? message.content : "")
1125
+ .filter(Boolean)
1126
+ .join("\n");
1127
+ }
1128
+ return "";
1129
+ }
1130
+ function merge(input) {
1131
+ return {
1132
+ ...(input.params ?? {}),
1133
+ ...(input.customParams ? { custom_params: input.customParams } : {}),
1134
+ };
1135
+ }
1136
+ function attach(input) {
1137
+ return {
1138
+ ...(input.attachments ? { attachments: input.attachments } : {}),
1139
+ ...(input.attachment ? { attachment: input.attachment } : {}),
1140
+ };
1141
+ }
1142
+ function url(input, kind) {
1143
+ const values = [input.attachment, ...(input.attachments ?? [])];
1144
+ for (const value of values) {
1145
+ if (!value || typeof value === "string")
1146
+ continue;
1147
+ const record = value;
1148
+ const type = lower(record.type ?? record.kind);
1149
+ const direct = kind === "image" ? record.image_url : kind === "audio" ? record.audio_url : record.video_url;
1150
+ const candidate = typeof direct === "string"
1151
+ ? direct
1152
+ : typeof record.url === "string"
1153
+ ? record.url
1154
+ : typeof record.uri === "string"
1155
+ ? record.uri
1156
+ : undefined;
1157
+ if (candidate && (type.includes(kind) || lower(record.mimeType ?? record.mime_type).startsWith(`${kind}/`))) {
1158
+ return candidate;
1159
+ }
1160
+ }
1161
+ return undefined;
1162
+ }
1163
+ function firstOutput(data, kind) {
1164
+ return data.output.find((item) => lower(item.type).includes(kind));
1165
+ }
1166
+ function dataurl(value) {
1167
+ if (!value)
1168
+ return {};
1169
+ const match = value.match(/^data:([^;,]+)?;base64,(.*)$/i);
1170
+ if (match) {
1171
+ return {
1172
+ ...(match[1] ? { mimeType: match[1] } : {}),
1173
+ base64: match[2],
1174
+ };
1175
+ }
1176
+ return { url: value };
1177
+ }
1178
+ function common(plan, value) {
1179
+ return {
1180
+ plan,
1181
+ receipt: value.receipt,
1182
+ requestId: value.requestId,
1183
+ budget: value.budget,
1184
+ sessionInvalidReason: value.sessionInvalidReason,
1185
+ };
1186
+ }
1187
+ function chat(plan, value) {
1188
+ return {
1189
+ type: "text",
1190
+ ...common(plan, value),
1191
+ text: value.data.choices[0]?.message?.content ?? "",
1192
+ data: value.data,
1193
+ };
1194
+ }
1195
+ function response(plan, value) {
1196
+ if (plan.modality === "embedding") {
1197
+ const embeddings = value.data.output
1198
+ .map((item) => item.embedding)
1199
+ .filter((item) => Array.isArray(item) && item.every((entry) => typeof entry === "number"));
1200
+ return { type: "embedding", ...common(plan, value), embeddings, data: value.data };
1201
+ }
1202
+ if (plan.modality === "image") {
1203
+ const item = firstOutput(value.data, "image");
1204
+ const parsed = dataurl(typeof item?.image_url === "string" ? item.image_url : undefined);
1205
+ return { type: "image", ...common(plan, value), ...parsed, data: value.data };
1206
+ }
1207
+ if (plan.modality === "audio") {
1208
+ const item = firstOutput(value.data, "audio");
1209
+ const parsed = dataurl(typeof item?.audio_url === "string" ? item.audio_url : undefined);
1210
+ return { type: "audio", ...common(plan, value), ...parsed, data: value.data };
1211
+ }
1212
+ if (plan.modality === "video") {
1213
+ const item = firstOutput(value.data, "video");
1214
+ const parsed = dataurl(typeof item?.video_url === "string" ? item.video_url : undefined);
1215
+ return {
1216
+ type: "video",
1217
+ ...common(plan, value),
1218
+ jobId: typeof value.data.job_id === "string" ? value.data.job_id : undefined,
1219
+ status: value.data.status,
1220
+ url: parsed.url,
1221
+ data: value.data,
1222
+ };
1223
+ }
1224
+ const item = firstOutput(value.data, "text");
1225
+ return {
1226
+ type: "text",
1227
+ ...common(plan, value),
1228
+ text: typeof item?.text === "string" ? item.text : "",
1229
+ data: value.data,
1230
+ };
1231
+ }
1232
+ function image(plan, value) {
1233
+ const item = value.data.data[0] ?? {};
1234
+ return {
1235
+ type: "image",
1236
+ ...common(plan, value),
1237
+ url: item.url,
1238
+ base64: item.b64_json,
1239
+ mimeType: item.b64_json ? "image/png" : undefined,
1240
+ data: value.data,
1241
+ };
1242
+ }
1243
+ function embedding(plan, value) {
1244
+ return {
1245
+ type: "embedding",
1246
+ ...common(plan, value),
1247
+ embeddings: value.data.data.map((item) => item.embedding),
1248
+ data: value.data,
1249
+ };
1250
+ }
1251
+ function transcript(plan, value) {
1252
+ return {
1253
+ type: "text",
1254
+ ...common(plan, value),
1255
+ text: value.data.text,
1256
+ data: value.data,
1257
+ };
1258
+ }
1259
+ function videoid(data) {
1260
+ if ("job_id" in data && typeof data.job_id === "string")
1261
+ return data.job_id;
1262
+ if ("id" in data && typeof data.id === "string")
1263
+ return data.id;
1264
+ return undefined;
1265
+ }
1266
+ function video(plan, value, final) {
1267
+ const firstVideo = value.data.data?.[0];
1268
+ return {
1269
+ type: "video",
1270
+ ...common(plan, value),
1271
+ jobId: videoid(value.data),
1272
+ status: final?.status ?? value.data.status,
1273
+ url: final?.url ?? firstVideo?.url,
1274
+ final,
1275
+ data: value.data,
1276
+ };
1277
+ }
1278
+ async function speech(plan, value) {
1279
+ const cloned = value.response.clone();
1280
+ const arrayBuffer = await cloned.arrayBuffer();
1281
+ const mimeType = value.response.headers.get("content-type")?.split(";")[0]?.trim() || "audio/mpeg";
1282
+ const bytes = new Uint8Array(arrayBuffer);
1283
+ const blob = typeof Blob !== "undefined" ? new Blob([bytes], { type: mimeType }) : undefined;
1284
+ const objectUrl = blob && typeof URL !== "undefined" && typeof URL.createObjectURL === "function"
1285
+ ? URL.createObjectURL(blob)
1286
+ : undefined;
1287
+ return {
1288
+ type: "audio",
1289
+ ...common(plan, value),
1290
+ mimeType,
1291
+ blob,
1292
+ objectUrl,
1293
+ arrayBuffer,
1294
+ base64: bytesToBase64(bytes),
1295
+ response: value.response,
1296
+ };
1297
+ }
955
1298
  // ---------------------------------------------------------------------------
956
1299
  // Top-level inference resource
957
1300
  // ---------------------------------------------------------------------------
958
1301
  export class InferenceResource {
1302
+ client;
1303
+ ctx;
959
1304
  chat;
960
1305
  responses;
961
1306
  embeddings;
@@ -963,6 +1308,8 @@ export class InferenceResource {
963
1308
  audio;
964
1309
  videos;
965
1310
  constructor(client, ctx) {
1311
+ this.client = client;
1312
+ this.ctx = ctx;
966
1313
  this.chat = { completions: new ChatCompletionsNamespace(client, ctx) };
967
1314
  this.responses = new ResponsesNamespace(client, ctx);
968
1315
  this.embeddings = new EmbeddingsNamespace(client, ctx);
@@ -970,5 +1317,124 @@ export class InferenceResource {
970
1317
  this.audio = new AudioNamespace(client, ctx);
971
1318
  this.videos = new VideosNamespace(client, ctx);
972
1319
  }
1320
+ async plan(input, options) {
1321
+ const info = input.modelInfo ?? await this.client.request({
1322
+ method: "GET",
1323
+ path: `/v1/models/${encodeURIComponent(input.model)}`,
1324
+ headers: buildCallHeaders(options, this.ctx.getWalletMaybe(), this.ctx.getTokenMaybe()),
1325
+ signal: options?.signal,
1326
+ timeoutMs: options?.timeoutMs,
1327
+ });
1328
+ const selected = op(input, info);
1329
+ const plan = {
1330
+ model: input.model,
1331
+ ...(input.provider ?? info.provider ? { provider: input.provider ?? info.provider } : {}),
1332
+ modality: selected.modality,
1333
+ operation: selected.operation,
1334
+ endpoint: endpoint(selected, input),
1335
+ };
1336
+ return plan;
1337
+ }
1338
+ async run(input, options) {
1339
+ const plan = await this.plan(input, options);
1340
+ const extra = merge(input);
1341
+ const attachments = attach(input);
1342
+ const prompt = text(input);
1343
+ if (plan.endpoint === "chat") {
1344
+ if (!input.messages)
1345
+ throw new BadRequestError({ message: "messages are required for chat inference" });
1346
+ return chat(plan, await this.chat.completions.create({
1347
+ model: input.model,
1348
+ messages: input.messages,
1349
+ provider: input.provider,
1350
+ ...attachments,
1351
+ ...extra,
1352
+ }, options));
1353
+ }
1354
+ if (plan.endpoint === "embeddings") {
1355
+ const embeddingInput = Array.isArray(input.input) && input.input.every((item) => typeof item === "string")
1356
+ ? input.input
1357
+ : prompt;
1358
+ if (!embeddingInput || (Array.isArray(embeddingInput) && embeddingInput.length === 0)) {
1359
+ throw new BadRequestError({ message: "input is required for embedding inference" });
1360
+ }
1361
+ return embedding(plan, await this.embeddings.create({
1362
+ model: input.model,
1363
+ input: embeddingInput,
1364
+ provider: input.provider,
1365
+ ...attachments,
1366
+ ...extra,
1367
+ }, options));
1368
+ }
1369
+ if (plan.endpoint === "images") {
1370
+ if (!prompt)
1371
+ throw new BadRequestError({ message: "prompt is required for image inference" });
1372
+ const imageUrl = url(input, "image");
1373
+ const body = {
1374
+ model: input.model,
1375
+ prompt,
1376
+ provider: input.provider,
1377
+ ...(imageUrl ? { image: imageUrl } : {}),
1378
+ ...attachments,
1379
+ ...extra,
1380
+ };
1381
+ return image(plan, imageUrl
1382
+ ? await this.images.edit(body, options)
1383
+ : await this.images.generate(body, options));
1384
+ }
1385
+ if (plan.endpoint === "audio-speech") {
1386
+ if (!prompt)
1387
+ throw new BadRequestError({ message: "input text is required for audio generation" });
1388
+ return speech(plan, await this.audio.speech({
1389
+ model: input.model,
1390
+ input: prompt,
1391
+ provider: input.provider,
1392
+ ...attachments,
1393
+ ...extra,
1394
+ }, options));
1395
+ }
1396
+ if (plan.endpoint === "audio-transcriptions") {
1397
+ if (!input.file)
1398
+ throw new BadRequestError({ message: "file is required for audio transcription" });
1399
+ return transcript(plan, await this.audio.transcriptions({
1400
+ model: input.model,
1401
+ file: input.file,
1402
+ provider: input.provider,
1403
+ ...attachments,
1404
+ ...extra,
1405
+ }, options));
1406
+ }
1407
+ if (plan.endpoint === "videos") {
1408
+ if (!prompt)
1409
+ throw new BadRequestError({ message: "prompt is required for video inference" });
1410
+ const created = await this.videos.generate({
1411
+ model: input.model,
1412
+ prompt,
1413
+ provider: input.provider,
1414
+ ...(url(input, "image") ? { image_url: url(input, "image") } : {}),
1415
+ ...attachments,
1416
+ ...extra,
1417
+ }, options);
1418
+ const id = videoid(created.data);
1419
+ const final = input.wait && id
1420
+ ? await this.videos.waitUntilDone(id, {
1421
+ ...options,
1422
+ pollIntervalMs: input.pollIntervalMs,
1423
+ timeoutMs: input.waitTimeoutMs ?? options?.timeoutMs,
1424
+ onStatus: input.onStatus,
1425
+ })
1426
+ : null;
1427
+ return video(plan, created, final);
1428
+ }
1429
+ const body = {
1430
+ model: input.model,
1431
+ input: input.input ?? prompt,
1432
+ modalities: [plan.modality],
1433
+ provider: input.provider,
1434
+ ...attachments,
1435
+ ...extra,
1436
+ };
1437
+ return response(plan, await this.responses.create(body, options));
1438
+ }
973
1439
  }
974
1440
  //# sourceMappingURL=inference.js.map