echo-ai-sdk-ts 2.5.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -36,6 +36,10 @@ __export(index_exports, {
36
36
  AgentIterationLimitError: () => AgentIterationLimitError,
37
37
  AgentPipeline: () => AgentPipeline,
38
38
  AgentRouter: () => AgentRouter,
39
+ AwsBedrockProvider: () => AwsBedrockProvider,
40
+ AwsSageMakerDeployer: () => AwsSageMakerDeployer,
41
+ AzureMlDeployer: () => AzureMlDeployer,
42
+ AzureOpenAiProvider: () => AzureOpenAiProvider,
39
43
  BaseProvider: () => BaseProvider,
40
44
  BaseSTTProvider: () => BaseSTTProvider,
41
45
  BaseSpeakerRecognizer: () => BaseSpeakerRecognizer,
@@ -53,9 +57,10 @@ __export(index_exports, {
53
57
  DEFAULT_REDACTION_RULES: () => DEFAULT_REDACTION_RULES,
54
58
  EchoAI: () => EchoAI,
55
59
  EchoVoice: () => EchoVoice,
56
- ExperimentManager: () => ExperimentManager,
57
60
  FileSessionStore: () => FileSessionStore,
58
61
  GatewayRoutingError: () => GatewayRoutingError,
62
+ GcpVertexManager: () => GcpVertexManager,
63
+ GcpVertexProvider: () => GcpVertexProvider,
59
64
  HandoffManager: () => HandoffManager,
60
65
  HuggingFaceImageGen: () => HuggingFaceImageGen,
61
66
  HuggingFaceTTS: () => HuggingFaceTTS,
@@ -14265,6 +14270,384 @@ var InferenceEndpointManager = class {
14265
14270
  if (!res.ok) throw new Error("Failed to resume endpoint");
14266
14271
  }
14267
14272
  };
14273
+
14274
+ // src/deployment/aws_sagemaker.ts
14275
+ var import_client_sagemaker = require("@aws-sdk/client-sagemaker");
14276
+ var AwsSageMakerDeployer = class {
14277
+ client;
14278
+ constructor(config) {
14279
+ this.client = new import_client_sagemaker.SageMakerClient({
14280
+ region: config.region,
14281
+ credentials: config.credentials
14282
+ });
14283
+ }
14284
+ /**
14285
+ * Orchestrates the 3-step SageMaker deployment process.
14286
+ */
14287
+ async deployEndpoint(params) {
14288
+ const configName = `${params.modelName}-config-${Date.now()}`;
14289
+ const endpointName = `${params.modelName}-endpoint`;
14290
+ try {
14291
+ await this.client.send(new import_client_sagemaker.CreateModelCommand({
14292
+ ModelName: params.modelName,
14293
+ PrimaryContainer: {
14294
+ Image: params.primaryContainerImage,
14295
+ ModelDataUrl: params.modelDataUrl,
14296
+ Environment: {
14297
+ // HF specific deployment variables if using Hugging Face DLC
14298
+ "HF_MODEL_ID": params.modelName,
14299
+ "HF_TASK": "text-generation"
14300
+ }
14301
+ },
14302
+ ExecutionRoleArn: params.executionRoleArn
14303
+ }));
14304
+ await this.client.send(new import_client_sagemaker.CreateEndpointConfigCommand({
14305
+ EndpointConfigName: configName,
14306
+ ProductionVariants: [{
14307
+ VariantName: "AllTraffic",
14308
+ ModelName: params.modelName,
14309
+ InitialInstanceCount: params.initialInstanceCount || 1,
14310
+ InstanceType: params.instanceType || "ml.g5.xlarge"
14311
+ }]
14312
+ }));
14313
+ await this.client.send(new import_client_sagemaker.CreateEndpointCommand({
14314
+ EndpointName: endpointName,
14315
+ EndpointConfigName: configName
14316
+ }));
14317
+ return endpointName;
14318
+ } catch (e) {
14319
+ throw new Error(`AWS SageMaker deployment failed: ${e.message}`);
14320
+ }
14321
+ }
14322
+ /**
14323
+ * Checks the provisioning status of a SageMaker Endpoint.
14324
+ */
14325
+ async getEndpointStatus(endpointName) {
14326
+ const res = await this.client.send(new import_client_sagemaker.DescribeEndpointCommand({ EndpointName: endpointName }));
14327
+ return res.EndpointStatus || "Unknown";
14328
+ }
14329
+ };
14330
+
14331
+ // src/deployment/gcp_vertex_manager.ts
14332
+ var import_aiplatform = require("@google-cloud/aiplatform");
14333
+ var GcpVertexManager = class {
14334
+ client;
14335
+ project;
14336
+ location;
14337
+ constructor(config) {
14338
+ this.client = new import_aiplatform.EndpointServiceClient({
14339
+ apiEndpoint: `${config.location}-aiplatform.googleapis.com`
14340
+ });
14341
+ this.project = config.project;
14342
+ this.location = config.location;
14343
+ }
14344
+ /**
14345
+ * Deploys an uploaded Vertex AI Model Resource to a newly created Endpoint.
14346
+ */
14347
+ async deployModel(params) {
14348
+ const parent = `projects/${this.project}/locations/${this.location}`;
14349
+ const [endpointLro] = await this.client.createEndpoint({
14350
+ parent,
14351
+ endpoint: { displayName: `${params.displayName}-endpoint` }
14352
+ });
14353
+ const [endpointResponse] = await endpointLro.promise();
14354
+ const endpointName = endpointResponse.name;
14355
+ const [deployLro] = await this.client.deployModel({
14356
+ endpoint: endpointName,
14357
+ deployedModel: {
14358
+ model: params.modelName,
14359
+ displayName: `${params.displayName}-deployment`,
14360
+ dedicatedResources: {
14361
+ machineSpec: {
14362
+ machineType: params.machineType || "n1-standard-4",
14363
+ acceleratorType: params.acceleratorType || void 0,
14364
+ acceleratorCount: params.acceleratorCount || 0
14365
+ },
14366
+ minReplicaCount: 1,
14367
+ maxReplicaCount: 1
14368
+ }
14369
+ }
14370
+ });
14371
+ console.log("Waiting for Vertex AI deployment LRO to finish...");
14372
+ await deployLro.promise();
14373
+ return endpointName;
14374
+ }
14375
+ };
14376
+
14377
+ // src/deployment/azure_ml.ts
14378
+ var import_arm_machinelearning = require("@azure/arm-machinelearning");
14379
+ var import_identity = require("@azure/identity");
14380
+ var AzureMlDeployer = class {
14381
+ client;
14382
+ config;
14383
+ constructor(config) {
14384
+ const credential = new import_identity.DefaultAzureCredential();
14385
+ this.client = new import_arm_machinelearning.AzureMachineLearningServicesManagementClient(credential, config.subscriptionId);
14386
+ this.config = config;
14387
+ }
14388
+ /**
14389
+ * Orchestrates the creation of an Azure Managed Online Endpoint and Deployment.
14390
+ */
14391
+ async deployEndpoint(params) {
14392
+ const { resourceGroupName, workspaceName } = this.config;
14393
+ try {
14394
+ const endpointLro = await this.client.onlineEndpoints.beginCreateOrUpdateAndWait(
14395
+ resourceGroupName,
14396
+ workspaceName,
14397
+ params.endpointName,
14398
+ {
14399
+ location: "eastus",
14400
+ // Would ideally be dynamic based on workspace location
14401
+ properties: {
14402
+ authMode: "Key",
14403
+ compute: "Managed"
14404
+ }
14405
+ }
14406
+ );
14407
+ await this.client.onlineDeployments.beginCreateOrUpdateAndWait(
14408
+ resourceGroupName,
14409
+ workspaceName,
14410
+ params.endpointName,
14411
+ params.deploymentName,
14412
+ {
14413
+ location: endpointLro.location,
14414
+ properties: {
14415
+ endpointComputeType: "Managed",
14416
+ model: params.modelId,
14417
+ instanceType: params.instanceType || "Standard_DS3_v2",
14418
+ scaleSettings: {
14419
+ scaleType: "Default",
14420
+ instanceCount: Math.max(1, params.instanceCount || 1)
14421
+ }
14422
+ }
14423
+ }
14424
+ );
14425
+ await this.client.onlineEndpoints.beginCreateOrUpdateAndWait(
14426
+ resourceGroupName,
14427
+ workspaceName,
14428
+ params.endpointName,
14429
+ {
14430
+ location: endpointLro.location,
14431
+ properties: {
14432
+ authMode: "Key",
14433
+ compute: "Managed",
14434
+ traffic: {
14435
+ [params.deploymentName]: 100
14436
+ }
14437
+ }
14438
+ }
14439
+ );
14440
+ return params.endpointName;
14441
+ } catch (e) {
14442
+ throw new Error(`Azure ML Deployment failed: ${e.message}`);
14443
+ }
14444
+ }
14445
+ /**
14446
+ * Retrieves the current provisioning state of an Azure ML Endpoint.
14447
+ */
14448
+ async getEndpointStatus(endpointName) {
14449
+ const endpoint = await this.client.onlineEndpoints.get(
14450
+ this.config.resourceGroupName,
14451
+ this.config.workspaceName,
14452
+ endpointName
14453
+ );
14454
+ return endpoint.properties?.provisioningState || "Unknown";
14455
+ }
14456
+ };
14457
+
14458
+ // src/models/aws_bedrock.ts
14459
+ var import_client_bedrock_runtime = require("@aws-sdk/client-bedrock-runtime");
14460
+ var AwsBedrockProvider = class extends BaseProvider {
14461
+ client;
14462
+ defaultModel;
14463
+ constructor(config) {
14464
+ super();
14465
+ this.client = new import_client_bedrock_runtime.BedrockRuntimeClient({
14466
+ region: config.region,
14467
+ credentials: config.credentials
14468
+ });
14469
+ this.defaultModel = config.defaultModel || "anthropic.claude-3-haiku-20240307-v1:0";
14470
+ }
14471
+ get providerName() {
14472
+ return "aws_bedrock";
14473
+ }
14474
+ async chatComplete(request) {
14475
+ const modelId = request.model_family === "smart" ? "anthropic.claude-3-sonnet-20240229-v1:0" : this.defaultModel;
14476
+ const systemMessage = request.messages.find((m) => m.role === "system")?.content || "";
14477
+ const userMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({
14478
+ role: m.role,
14479
+ content: [{ type: "text", text: m.content }]
14480
+ }));
14481
+ const payload = {
14482
+ anthropic_version: "bedrock-2023-05-31",
14483
+ max_tokens: request.max_tokens || 1024,
14484
+ system: systemMessage,
14485
+ messages: userMessages,
14486
+ temperature: request.temperature || 0.7
14487
+ };
14488
+ try {
14489
+ const command = new import_client_bedrock_runtime.InvokeModelCommand({
14490
+ modelId,
14491
+ contentType: "application/json",
14492
+ accept: "application/json",
14493
+ body: JSON.stringify(payload)
14494
+ });
14495
+ const response = await this.client.send(command);
14496
+ const responseBody = JSON.parse(new TextDecoder().decode(response.body));
14497
+ return {
14498
+ content: responseBody.content?.[0]?.text || null,
14499
+ usage: {
14500
+ prompt_tokens: responseBody.usage?.input_tokens || 0,
14501
+ completion_tokens: responseBody.usage?.output_tokens || 0,
14502
+ total_tokens: (responseBody.usage?.input_tokens || 0) + (responseBody.usage?.output_tokens || 0)
14503
+ },
14504
+ provider_name: this.providerName,
14505
+ model_name: modelId
14506
+ };
14507
+ } catch (e) {
14508
+ throw new Error(`AWS Bedrock invocation failed: ${e.message}`);
14509
+ }
14510
+ }
14511
+ async *chatStream(request) {
14512
+ const res = await this.chatComplete(request);
14513
+ if (res.content) {
14514
+ yield res.content;
14515
+ }
14516
+ }
14517
+ };
14518
+
14519
+ // src/models/gcp_vertex.ts
14520
+ var import_vertexai = require("@google-cloud/vertexai");
14521
+ var GcpVertexProvider = class extends BaseProvider {
14522
+ client;
14523
+ defaultModel;
14524
+ constructor(config) {
14525
+ super();
14526
+ this.client = new import_vertexai.VertexAI({ project: config.project, location: config.location });
14527
+ this.defaultModel = config.defaultModel || "gemini-1.5-flash-preview-0409";
14528
+ }
14529
+ get providerName() {
14530
+ return "gcp_vertex";
14531
+ }
14532
+ async chatComplete(request) {
14533
+ const model = request.model_family === "smart" ? "gemini-1.5-pro-preview-0409" : this.defaultModel;
14534
+ const generativeModel = this.client.getGenerativeModel({
14535
+ model,
14536
+ generationConfig: {
14537
+ maxOutputTokens: request.max_tokens || 1024,
14538
+ temperature: request.temperature || 0.7
14539
+ }
14540
+ });
14541
+ const systemInstruction = request.messages.find((m) => m.role === "system")?.content || void 0;
14542
+ const contents = request.messages.filter((m) => m.role !== "system").map((m) => ({
14543
+ role: m.role === "assistant" ? "model" : "user",
14544
+ parts: [{ text: m.content }]
14545
+ }));
14546
+ try {
14547
+ const response = await generativeModel.generateContent({
14548
+ contents,
14549
+ systemInstruction: systemInstruction ? { role: "system", parts: [{ text: systemInstruction }] } : void 0
14550
+ });
14551
+ const responseText = response.response.candidates?.[0]?.content?.parts?.[0]?.text || null;
14552
+ return {
14553
+ content: responseText,
14554
+ usage: {
14555
+ prompt_tokens: response.response.usageMetadata?.promptTokenCount || 0,
14556
+ completion_tokens: response.response.usageMetadata?.candidatesTokenCount || 0,
14557
+ total_tokens: response.response.usageMetadata?.totalTokenCount || 0
14558
+ },
14559
+ provider_name: this.providerName,
14560
+ model_name: model
14561
+ };
14562
+ } catch (e) {
14563
+ throw new Error(`GCP Vertex invocation failed: ${e.message}`);
14564
+ }
14565
+ }
14566
+ async *chatStream(request) {
14567
+ const model = request.model_family === "smart" ? "gemini-1.5-pro-preview-0409" : this.defaultModel;
14568
+ const generativeModel = this.client.getGenerativeModel({ model });
14569
+ const contents = request.messages.filter((m) => m.role !== "system").map((m) => ({
14570
+ role: m.role === "assistant" ? "model" : "user",
14571
+ parts: [{ text: m.content }]
14572
+ }));
14573
+ const stream = await generativeModel.generateContentStream({ contents });
14574
+ for await (const chunk of stream.stream) {
14575
+ if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
14576
+ yield chunk.candidates[0].content.parts[0].text;
14577
+ }
14578
+ }
14579
+ }
14580
+ };
14581
+
14582
+ // src/models/azure_openai.ts
14583
+ var import_openai6 = require("@azure/openai");
14584
+ var AzureOpenAiProvider = class extends BaseProvider {
14585
+ client;
14586
+ deploymentName;
14587
+ constructor(config) {
14588
+ super();
14589
+ this.client = new import_openai6.OpenAIClient(config.endpoint, new import_openai6.AzureKeyCredential(config.apiKey));
14590
+ this.deploymentName = config.deploymentName || "gpt-35-turbo";
14591
+ }
14592
+ get providerName() {
14593
+ return "azure_openai";
14594
+ }
14595
+ async chatComplete(request) {
14596
+ try {
14597
+ const response = await this.client.getChatCompletions(
14598
+ this.deploymentName,
14599
+ request.messages.map((m) => ({
14600
+ role: m.role,
14601
+ content: m.content
14602
+ })),
14603
+ {
14604
+ maxTokens: request.max_tokens || 1024,
14605
+ temperature: request.temperature || 0.7
14606
+ }
14607
+ );
14608
+ const message = response.choices[0].message;
14609
+ return {
14610
+ content: message?.content || null,
14611
+ tool_calls: message?.toolCalls ? message.toolCalls.map((tc) => ({
14612
+ id: tc.id,
14613
+ function: {
14614
+ name: tc.function?.name,
14615
+ arguments: tc.function?.arguments
14616
+ }
14617
+ })) : void 0,
14618
+ usage: {
14619
+ prompt_tokens: response.usage?.promptTokens || 0,
14620
+ completion_tokens: response.usage?.completionTokens || 0,
14621
+ total_tokens: response.usage?.totalTokens || 0
14622
+ },
14623
+ provider_name: this.providerName,
14624
+ model_name: this.deploymentName
14625
+ };
14626
+ } catch (e) {
14627
+ throw new Error(`Azure OpenAI invocation failed: ${e.message}`);
14628
+ }
14629
+ }
14630
+ async *chatStream(request) {
14631
+ const stream = await this.client.streamChatCompletions(
14632
+ this.deploymentName,
14633
+ request.messages.map((m) => ({
14634
+ role: m.role,
14635
+ content: m.content
14636
+ })),
14637
+ {
14638
+ maxTokens: request.max_tokens || 1024,
14639
+ temperature: request.temperature || 0.7
14640
+ }
14641
+ );
14642
+ for await (const chunk of stream) {
14643
+ for (const choice of chunk.choices) {
14644
+ if (choice.delta?.content) {
14645
+ yield choice.delta.content;
14646
+ }
14647
+ }
14648
+ }
14649
+ }
14650
+ };
14268
14651
  // Annotate the CommonJS export names for ESM import in node:
14269
14652
  0 && (module.exports = {
14270
14653
  AIModelGateway,
@@ -14273,6 +14656,10 @@ var InferenceEndpointManager = class {
14273
14656
  AgentIterationLimitError,
14274
14657
  AgentPipeline,
14275
14658
  AgentRouter,
14659
+ AwsBedrockProvider,
14660
+ AwsSageMakerDeployer,
14661
+ AzureMlDeployer,
14662
+ AzureOpenAiProvider,
14276
14663
  BaseProvider,
14277
14664
  BaseSTTProvider,
14278
14665
  BaseSpeakerRecognizer,
@@ -14290,9 +14677,10 @@ var InferenceEndpointManager = class {
14290
14677
  DEFAULT_REDACTION_RULES,
14291
14678
  EchoAI,
14292
14679
  EchoVoice,
14293
- ExperimentManager,
14294
14680
  FileSessionStore,
14295
14681
  GatewayRoutingError,
14682
+ GcpVertexManager,
14683
+ GcpVertexProvider,
14296
14684
  HandoffManager,
14297
14685
  HuggingFaceImageGen,
14298
14686
  HuggingFaceTTS,
package/dist/index.mjs CHANGED
@@ -14177,6 +14177,384 @@ var InferenceEndpointManager = class {
14177
14177
  if (!res.ok) throw new Error("Failed to resume endpoint");
14178
14178
  }
14179
14179
  };
14180
+
14181
+ // src/deployment/aws_sagemaker.ts
14182
+ import { SageMakerClient, CreateModelCommand, CreateEndpointConfigCommand, CreateEndpointCommand, DescribeEndpointCommand } from "@aws-sdk/client-sagemaker";
14183
+ var AwsSageMakerDeployer = class {
14184
+ client;
14185
+ constructor(config) {
14186
+ this.client = new SageMakerClient({
14187
+ region: config.region,
14188
+ credentials: config.credentials
14189
+ });
14190
+ }
14191
+ /**
14192
+ * Orchestrates the 3-step SageMaker deployment process.
14193
+ */
14194
+ async deployEndpoint(params) {
14195
+ const configName = `${params.modelName}-config-${Date.now()}`;
14196
+ const endpointName = `${params.modelName}-endpoint`;
14197
+ try {
14198
+ await this.client.send(new CreateModelCommand({
14199
+ ModelName: params.modelName,
14200
+ PrimaryContainer: {
14201
+ Image: params.primaryContainerImage,
14202
+ ModelDataUrl: params.modelDataUrl,
14203
+ Environment: {
14204
+ // HF specific deployment variables if using Hugging Face DLC
14205
+ "HF_MODEL_ID": params.modelName,
14206
+ "HF_TASK": "text-generation"
14207
+ }
14208
+ },
14209
+ ExecutionRoleArn: params.executionRoleArn
14210
+ }));
14211
+ await this.client.send(new CreateEndpointConfigCommand({
14212
+ EndpointConfigName: configName,
14213
+ ProductionVariants: [{
14214
+ VariantName: "AllTraffic",
14215
+ ModelName: params.modelName,
14216
+ InitialInstanceCount: params.initialInstanceCount || 1,
14217
+ InstanceType: params.instanceType || "ml.g5.xlarge"
14218
+ }]
14219
+ }));
14220
+ await this.client.send(new CreateEndpointCommand({
14221
+ EndpointName: endpointName,
14222
+ EndpointConfigName: configName
14223
+ }));
14224
+ return endpointName;
14225
+ } catch (e) {
14226
+ throw new Error(`AWS SageMaker deployment failed: ${e.message}`);
14227
+ }
14228
+ }
14229
+ /**
14230
+ * Checks the provisioning status of a SageMaker Endpoint.
14231
+ */
14232
+ async getEndpointStatus(endpointName) {
14233
+ const res = await this.client.send(new DescribeEndpointCommand({ EndpointName: endpointName }));
14234
+ return res.EndpointStatus || "Unknown";
14235
+ }
14236
+ };
14237
+
14238
+ // src/deployment/gcp_vertex_manager.ts
14239
+ import { EndpointServiceClient } from "@google-cloud/aiplatform";
14240
+ var GcpVertexManager = class {
14241
+ client;
14242
+ project;
14243
+ location;
14244
+ constructor(config) {
14245
+ this.client = new EndpointServiceClient({
14246
+ apiEndpoint: `${config.location}-aiplatform.googleapis.com`
14247
+ });
14248
+ this.project = config.project;
14249
+ this.location = config.location;
14250
+ }
14251
+ /**
14252
+ * Deploys an uploaded Vertex AI Model Resource to a newly created Endpoint.
14253
+ */
14254
+ async deployModel(params) {
14255
+ const parent = `projects/${this.project}/locations/${this.location}`;
14256
+ const [endpointLro] = await this.client.createEndpoint({
14257
+ parent,
14258
+ endpoint: { displayName: `${params.displayName}-endpoint` }
14259
+ });
14260
+ const [endpointResponse] = await endpointLro.promise();
14261
+ const endpointName = endpointResponse.name;
14262
+ const [deployLro] = await this.client.deployModel({
14263
+ endpoint: endpointName,
14264
+ deployedModel: {
14265
+ model: params.modelName,
14266
+ displayName: `${params.displayName}-deployment`,
14267
+ dedicatedResources: {
14268
+ machineSpec: {
14269
+ machineType: params.machineType || "n1-standard-4",
14270
+ acceleratorType: params.acceleratorType || void 0,
14271
+ acceleratorCount: params.acceleratorCount || 0
14272
+ },
14273
+ minReplicaCount: 1,
14274
+ maxReplicaCount: 1
14275
+ }
14276
+ }
14277
+ });
14278
+ console.log("Waiting for Vertex AI deployment LRO to finish...");
14279
+ await deployLro.promise();
14280
+ return endpointName;
14281
+ }
14282
+ };
14283
+
14284
+ // src/deployment/azure_ml.ts
14285
+ import { AzureMachineLearningServicesManagementClient } from "@azure/arm-machinelearning";
14286
+ import { DefaultAzureCredential } from "@azure/identity";
14287
+ var AzureMlDeployer = class {
14288
+ client;
14289
+ config;
14290
+ constructor(config) {
14291
+ const credential = new DefaultAzureCredential();
14292
+ this.client = new AzureMachineLearningServicesManagementClient(credential, config.subscriptionId);
14293
+ this.config = config;
14294
+ }
14295
+ /**
14296
+ * Orchestrates the creation of an Azure Managed Online Endpoint and Deployment.
14297
+ */
14298
+ async deployEndpoint(params) {
14299
+ const { resourceGroupName, workspaceName } = this.config;
14300
+ try {
14301
+ const endpointLro = await this.client.onlineEndpoints.beginCreateOrUpdateAndWait(
14302
+ resourceGroupName,
14303
+ workspaceName,
14304
+ params.endpointName,
14305
+ {
14306
+ location: "eastus",
14307
+ // Would ideally be dynamic based on workspace location
14308
+ properties: {
14309
+ authMode: "Key",
14310
+ compute: "Managed"
14311
+ }
14312
+ }
14313
+ );
14314
+ await this.client.onlineDeployments.beginCreateOrUpdateAndWait(
14315
+ resourceGroupName,
14316
+ workspaceName,
14317
+ params.endpointName,
14318
+ params.deploymentName,
14319
+ {
14320
+ location: endpointLro.location,
14321
+ properties: {
14322
+ endpointComputeType: "Managed",
14323
+ model: params.modelId,
14324
+ instanceType: params.instanceType || "Standard_DS3_v2",
14325
+ scaleSettings: {
14326
+ scaleType: "Default",
14327
+ instanceCount: Math.max(1, params.instanceCount || 1)
14328
+ }
14329
+ }
14330
+ }
14331
+ );
14332
+ await this.client.onlineEndpoints.beginCreateOrUpdateAndWait(
14333
+ resourceGroupName,
14334
+ workspaceName,
14335
+ params.endpointName,
14336
+ {
14337
+ location: endpointLro.location,
14338
+ properties: {
14339
+ authMode: "Key",
14340
+ compute: "Managed",
14341
+ traffic: {
14342
+ [params.deploymentName]: 100
14343
+ }
14344
+ }
14345
+ }
14346
+ );
14347
+ return params.endpointName;
14348
+ } catch (e) {
14349
+ throw new Error(`Azure ML Deployment failed: ${e.message}`);
14350
+ }
14351
+ }
14352
+ /**
14353
+ * Retrieves the current provisioning state of an Azure ML Endpoint.
14354
+ */
14355
+ async getEndpointStatus(endpointName) {
14356
+ const endpoint = await this.client.onlineEndpoints.get(
14357
+ this.config.resourceGroupName,
14358
+ this.config.workspaceName,
14359
+ endpointName
14360
+ );
14361
+ return endpoint.properties?.provisioningState || "Unknown";
14362
+ }
14363
+ };
14364
+
14365
+ // src/models/aws_bedrock.ts
14366
+ import { BedrockRuntimeClient, InvokeModelCommand } from "@aws-sdk/client-bedrock-runtime";
14367
+ var AwsBedrockProvider = class extends BaseProvider {
14368
+ client;
14369
+ defaultModel;
14370
+ constructor(config) {
14371
+ super();
14372
+ this.client = new BedrockRuntimeClient({
14373
+ region: config.region,
14374
+ credentials: config.credentials
14375
+ });
14376
+ this.defaultModel = config.defaultModel || "anthropic.claude-3-haiku-20240307-v1:0";
14377
+ }
14378
+ get providerName() {
14379
+ return "aws_bedrock";
14380
+ }
14381
+ async chatComplete(request) {
14382
+ const modelId = request.model_family === "smart" ? "anthropic.claude-3-sonnet-20240229-v1:0" : this.defaultModel;
14383
+ const systemMessage = request.messages.find((m) => m.role === "system")?.content || "";
14384
+ const userMessages = request.messages.filter((m) => m.role !== "system").map((m) => ({
14385
+ role: m.role,
14386
+ content: [{ type: "text", text: m.content }]
14387
+ }));
14388
+ const payload = {
14389
+ anthropic_version: "bedrock-2023-05-31",
14390
+ max_tokens: request.max_tokens || 1024,
14391
+ system: systemMessage,
14392
+ messages: userMessages,
14393
+ temperature: request.temperature || 0.7
14394
+ };
14395
+ try {
14396
+ const command = new InvokeModelCommand({
14397
+ modelId,
14398
+ contentType: "application/json",
14399
+ accept: "application/json",
14400
+ body: JSON.stringify(payload)
14401
+ });
14402
+ const response = await this.client.send(command);
14403
+ const responseBody = JSON.parse(new TextDecoder().decode(response.body));
14404
+ return {
14405
+ content: responseBody.content?.[0]?.text || null,
14406
+ usage: {
14407
+ prompt_tokens: responseBody.usage?.input_tokens || 0,
14408
+ completion_tokens: responseBody.usage?.output_tokens || 0,
14409
+ total_tokens: (responseBody.usage?.input_tokens || 0) + (responseBody.usage?.output_tokens || 0)
14410
+ },
14411
+ provider_name: this.providerName,
14412
+ model_name: modelId
14413
+ };
14414
+ } catch (e) {
14415
+ throw new Error(`AWS Bedrock invocation failed: ${e.message}`);
14416
+ }
14417
+ }
14418
+ async *chatStream(request) {
14419
+ const res = await this.chatComplete(request);
14420
+ if (res.content) {
14421
+ yield res.content;
14422
+ }
14423
+ }
14424
+ };
14425
+
14426
+ // src/models/gcp_vertex.ts
14427
+ import { VertexAI } from "@google-cloud/vertexai";
14428
+ var GcpVertexProvider = class extends BaseProvider {
14429
+ client;
14430
+ defaultModel;
14431
+ constructor(config) {
14432
+ super();
14433
+ this.client = new VertexAI({ project: config.project, location: config.location });
14434
+ this.defaultModel = config.defaultModel || "gemini-1.5-flash-preview-0409";
14435
+ }
14436
+ get providerName() {
14437
+ return "gcp_vertex";
14438
+ }
14439
+ async chatComplete(request) {
14440
+ const model = request.model_family === "smart" ? "gemini-1.5-pro-preview-0409" : this.defaultModel;
14441
+ const generativeModel = this.client.getGenerativeModel({
14442
+ model,
14443
+ generationConfig: {
14444
+ maxOutputTokens: request.max_tokens || 1024,
14445
+ temperature: request.temperature || 0.7
14446
+ }
14447
+ });
14448
+ const systemInstruction = request.messages.find((m) => m.role === "system")?.content || void 0;
14449
+ const contents = request.messages.filter((m) => m.role !== "system").map((m) => ({
14450
+ role: m.role === "assistant" ? "model" : "user",
14451
+ parts: [{ text: m.content }]
14452
+ }));
14453
+ try {
14454
+ const response = await generativeModel.generateContent({
14455
+ contents,
14456
+ systemInstruction: systemInstruction ? { role: "system", parts: [{ text: systemInstruction }] } : void 0
14457
+ });
14458
+ const responseText = response.response.candidates?.[0]?.content?.parts?.[0]?.text || null;
14459
+ return {
14460
+ content: responseText,
14461
+ usage: {
14462
+ prompt_tokens: response.response.usageMetadata?.promptTokenCount || 0,
14463
+ completion_tokens: response.response.usageMetadata?.candidatesTokenCount || 0,
14464
+ total_tokens: response.response.usageMetadata?.totalTokenCount || 0
14465
+ },
14466
+ provider_name: this.providerName,
14467
+ model_name: model
14468
+ };
14469
+ } catch (e) {
14470
+ throw new Error(`GCP Vertex invocation failed: ${e.message}`);
14471
+ }
14472
+ }
14473
+ async *chatStream(request) {
14474
+ const model = request.model_family === "smart" ? "gemini-1.5-pro-preview-0409" : this.defaultModel;
14475
+ const generativeModel = this.client.getGenerativeModel({ model });
14476
+ const contents = request.messages.filter((m) => m.role !== "system").map((m) => ({
14477
+ role: m.role === "assistant" ? "model" : "user",
14478
+ parts: [{ text: m.content }]
14479
+ }));
14480
+ const stream = await generativeModel.generateContentStream({ contents });
14481
+ for await (const chunk of stream.stream) {
14482
+ if (chunk.candidates?.[0]?.content?.parts?.[0]?.text) {
14483
+ yield chunk.candidates[0].content.parts[0].text;
14484
+ }
14485
+ }
14486
+ }
14487
+ };
14488
+
14489
+ // src/models/azure_openai.ts
14490
+ import { OpenAIClient, AzureKeyCredential } from "@azure/openai";
14491
+ var AzureOpenAiProvider = class extends BaseProvider {
14492
+ client;
14493
+ deploymentName;
14494
+ constructor(config) {
14495
+ super();
14496
+ this.client = new OpenAIClient(config.endpoint, new AzureKeyCredential(config.apiKey));
14497
+ this.deploymentName = config.deploymentName || "gpt-35-turbo";
14498
+ }
14499
+ get providerName() {
14500
+ return "azure_openai";
14501
+ }
14502
+ async chatComplete(request) {
14503
+ try {
14504
+ const response = await this.client.getChatCompletions(
14505
+ this.deploymentName,
14506
+ request.messages.map((m) => ({
14507
+ role: m.role,
14508
+ content: m.content
14509
+ })),
14510
+ {
14511
+ maxTokens: request.max_tokens || 1024,
14512
+ temperature: request.temperature || 0.7
14513
+ }
14514
+ );
14515
+ const message = response.choices[0].message;
14516
+ return {
14517
+ content: message?.content || null,
14518
+ tool_calls: message?.toolCalls ? message.toolCalls.map((tc) => ({
14519
+ id: tc.id,
14520
+ function: {
14521
+ name: tc.function?.name,
14522
+ arguments: tc.function?.arguments
14523
+ }
14524
+ })) : void 0,
14525
+ usage: {
14526
+ prompt_tokens: response.usage?.promptTokens || 0,
14527
+ completion_tokens: response.usage?.completionTokens || 0,
14528
+ total_tokens: response.usage?.totalTokens || 0
14529
+ },
14530
+ provider_name: this.providerName,
14531
+ model_name: this.deploymentName
14532
+ };
14533
+ } catch (e) {
14534
+ throw new Error(`Azure OpenAI invocation failed: ${e.message}`);
14535
+ }
14536
+ }
14537
+ async *chatStream(request) {
14538
+ const stream = await this.client.streamChatCompletions(
14539
+ this.deploymentName,
14540
+ request.messages.map((m) => ({
14541
+ role: m.role,
14542
+ content: m.content
14543
+ })),
14544
+ {
14545
+ maxTokens: request.max_tokens || 1024,
14546
+ temperature: request.temperature || 0.7
14547
+ }
14548
+ );
14549
+ for await (const chunk of stream) {
14550
+ for (const choice of chunk.choices) {
14551
+ if (choice.delta?.content) {
14552
+ yield choice.delta.content;
14553
+ }
14554
+ }
14555
+ }
14556
+ }
14557
+ };
14180
14558
  export {
14181
14559
  AIModelGateway,
14182
14560
  APIConnector,
@@ -14184,6 +14562,10 @@ export {
14184
14562
  AgentIterationLimitError,
14185
14563
  AgentPipeline,
14186
14564
  AgentRouter,
14565
+ AwsBedrockProvider,
14566
+ AwsSageMakerDeployer,
14567
+ AzureMlDeployer,
14568
+ AzureOpenAiProvider,
14187
14569
  BaseProvider,
14188
14570
  BaseSTTProvider,
14189
14571
  BaseSpeakerRecognizer,
@@ -14201,9 +14583,10 @@ export {
14201
14583
  DEFAULT_REDACTION_RULES,
14202
14584
  EchoAI,
14203
14585
  EchoVoice,
14204
- ExperimentManager,
14205
14586
  FileSessionStore,
14206
14587
  GatewayRoutingError,
14588
+ GcpVertexManager,
14589
+ GcpVertexProvider,
14207
14590
  HandoffManager,
14208
14591
  HuggingFaceImageGen,
14209
14592
  HuggingFaceTTS,
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "echo-ai-sdk-ts",
3
- "version": "2.5.0",
4
- "description": "Echo AI SDK: Tier 4 Serverless Modality (Hugging Face TTS, Image Generation, Managed Deployments)",
3
+ "version": "2.6.0",
4
+ "description": "Echo AI SDK: Tier 5 Cloud Native (AWS Bedrock/SageMaker, GCP Vertex, Azure OpenAI/ML)",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
7
7
  "types": "./dist/index.d.ts",
@@ -9,7 +9,7 @@
9
9
  "dist"
10
10
  ],
11
11
  "scripts": {
12
- "build": "tsup src/index.ts --format cjs,esm --dts",
12
+ "build": "tsup src/index.ts --format cjs,esm --external @azure/arm-machinelearning,@azure/identity,@google-cloud/aiplatform,@google-cloud/vertexai,@aws-sdk/client-sagemaker,@aws-sdk/client-bedrock-runtime,@azure/openai",
13
13
  "dev": "tsup src/index.ts --format cjs,esm --watch --dts",
14
14
  "lint": "tsc",
15
15
  "test": "vitest run"
@@ -23,6 +23,13 @@
23
23
  },
24
24
  "devDependencies": {
25
25
  "@anthropic-ai/sdk": "^0.80.0",
26
+ "@aws-sdk/client-bedrock-runtime": "^3.1019.0",
27
+ "@aws-sdk/client-sagemaker": "^3.1019.0",
28
+ "@azure/arm-machinelearning": "^3.0.0",
29
+ "@azure/identity": "^4.13.1",
30
+ "@azure/openai": "^2.0.0",
31
+ "@google-cloud/aiplatform": "^6.5.0",
32
+ "@google-cloud/vertexai": "^1.10.3",
26
33
  "@types/cors": "^2.8.17",
27
34
  "@types/express": "^4.17.21",
28
35
  "@types/node": "^20.11.24",
@@ -31,4 +38,4 @@
31
38
  "typescript": "^5.3.3",
32
39
  "vitest": "^1.3.1"
33
40
  }
34
- }
41
+ }