aws-cdk-neuronx-patterns 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +754 -117
- package/API.md +1044 -158
- package/README.ja.md +18 -6
- package/README.md +16 -5
- package/lib/base/aws-batch/neuronx-batch-compute-environment.js +1 -1
- package/lib/base/aws-batch/neuronx-batch-ecs-job-definition.js +1 -1
- package/lib/base/aws-batch/neuronx-batch.js +1 -1
- package/lib/base/aws-ecs-patterns/application-load-balanced-neuronx-service.js +4 -4
- package/lib/base/neuronx/calculator.test.js +61 -1
- package/lib/base/neuronx/deep-learning-containers.js +3 -3
- package/lib/base/neuronx/model.js +2 -2
- package/lib/base/neuronx/neuron-optimized-machine-image.js +1 -1
- package/lib/base/neuronx/neuronx-instance-type.d.ts +18 -0
- package/lib/base/neuronx/neuronx-instance-type.js +60 -7
- package/lib/base/neuronx/neuronx-instance-type.test.js +80 -1
- package/lib/base/neuronx-compiler/index.d.ts +3 -1
- package/lib/base/neuronx-compiler/index.js +4 -2
- package/lib/base/neuronx-compiler/{neuronx-compiler.d.ts → neuronx-compiler-base.d.ts} +74 -32
- package/lib/base/neuronx-compiler/neuronx-compiler-base.js +129 -0
- package/lib/base/neuronx-compiler/neuronx-cross-compiler.d.ts +30 -0
- package/lib/base/neuronx-compiler/neuronx-cross-compiler.js +83 -0
- package/lib/base/neuronx-compiler/neuronx-native-compiler.d.ts +18 -0
- package/lib/base/neuronx-compiler/neuronx-native-compiler.js +69 -0
- package/lib/base/server-engine/vllm-engine/vllm-engine-argments.js +1 -1
- package/lib/sagemaker-inference-toolkit-tnx/sagemaker-inference-toolkit-tnx-compiler.js +2 -2
- package/lib/sagemaker-inference-toolkit-tnx/sagemaker-inference-toolkit-tnx-sagemaker.d.ts +1 -1
- package/lib/sagemaker-inference-toolkit-tnx/sagemaker-inference-toolkit-tnx-sagemaker.js +2 -2
- package/lib/vllm-nxd-inference/vllm-nxd-inference-compiler.d.ts +8 -0
- package/lib/vllm-nxd-inference/vllm-nxd-inference-compiler.js +32 -4
- package/lib/vllm-nxd-inference/vllm-nxd-inference-ecs-patterns.js +6 -6
- package/package.json +7 -7
- package/scripts/compile/vllm-nxd-inference/Dockerfile +5 -0
- package/scripts/compile/vllm-nxd-inference/entrypoint.sh +39 -14
- package/lib/base/neuronx-compiler/neuronx-compiler.js +0 -166
package/.jsii
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
},
|
|
9
9
|
"dependencies": {
|
|
10
10
|
"@aws-cdk/aws-sagemaker-alpha": "2.240.0-alpha.0",
|
|
11
|
-
"@cdklabs/deploy-time-build": "^0.0
|
|
11
|
+
"@cdklabs/deploy-time-build": "^0.1.0",
|
|
12
12
|
"aws-cdk-lib": "^2.240.0",
|
|
13
13
|
"constructs": "^10.5.1"
|
|
14
14
|
},
|
|
@@ -8555,7 +8555,7 @@
|
|
|
8555
8555
|
"stability": "stable"
|
|
8556
8556
|
},
|
|
8557
8557
|
"homepage": "https://github.com/WinterYukky/aws-cdk-neuronx-patterns.git",
|
|
8558
|
-
"jsiiVersion": "5.9.
|
|
8558
|
+
"jsiiVersion": "5.9.32 (build ac92fbd)",
|
|
8559
8559
|
"keywords": [
|
|
8560
8560
|
"cdk",
|
|
8561
8561
|
"neuronx"
|
|
@@ -8571,7 +8571,7 @@
|
|
|
8571
8571
|
},
|
|
8572
8572
|
"name": "aws-cdk-neuronx-patterns",
|
|
8573
8573
|
"readme": {
|
|
8574
|
-
"markdown": "# Neuronx patterns Construct Library\n\n> [!WARNING]\n> This library is experimental module.\n\nThis library provides high-level architectural patterns using AWS Neuronx (e.g. Inferentia2 and Trainium1). It contains:\n\n- vLLM with NxD Inference on ALB & ECS on EC2\n- Neuronx Compiler\n\n[日本語版 README はこちら](./README.ja.md)\n\n## Table of Contents\n\n- [Installation](#installation)\n- [Quick Start](#quick-start)\n- [vLLM NxD Inference on ALB & ECS on EC2](#vllm-nxd-inference-on-alb--ecs-on-ec2)\n - [Architecture](#architecture)\n - [Basic Usage](#basic-usage)\n - [Complete Example](#complete-example)\n - [Using Specific Official AWS Neuron vLLM Image Version](#using-specific-official-aws-neuron-vllm-image-version)\n - [Using HuggingFace Token with Secrets](#using-huggingface-token-with-secrets)\n- [Neuronx Compiler](#neuronx-compiler)\n - [Spot Instance](#spot-instance)\n- [API Reference](#api-reference)\n- [Cost Considerations](#cost-considerations)\n- [Troubleshooting](#troubleshooting)\n- [Security Best Practices](#security-best-practices)\n- [License](#license)\n\n## Installation\n\n```bash\n# NPM\nnpm i aws-cdk-neuronx-patterns\n\n# yarn\nyarn add aws-cdk-neuronx-patterns\n\n# PNPM\npnpm i aws-cdk-neuronx-patterns\n```\n\n## Quick Start\n\nHere's a minimal example to deploy a vLLM inference service:\n\n```ts\nimport * as cdk from \"aws-cdk-lib\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\nconst app = new cdk.App();\nconst stack = new cdk.Stack(app, \"VllmInferenceStack\");\n\nconst vpc = new ec2.Vpc(stack, \"Vpc\", { maxAzs: 2 });\nconst bucket = new s3.Bucket(stack, \"ModelBucket\");\n\nconst compiler = new VllmNxdInferenceCompiler(stack, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n});\n\nconst compiledModel = compiler.compile();\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(stack, \"TaskDef\", {\n compiledModel,\n});\n\nconst service = new ApplicationLoadBalancedVllmNxDInferenceService(\n stack,\n \"Service\",\n { vpc, taskDefinition }\n);\n\nnew cdk.CfnOutput(stack, \"LoadBalancerDNS\", {\n value: service.loadBalancer.loadBalancerDnsName,\n});\n```\n\n## vLLM NxD Inference on ALB & ECS on EC2\n\n> [!WARNING]\n> This construct uses an Inferentia2 instance on EC2. You may need to increase your service quota for Inferentia2 instances in your AWS account via the [Service Quotas console](https://console.aws.amazon.com/servicequotas/).\n\nThis pattern combines `VllmNxdInferenceCompiler` for model compilation and `ApplicationLoadBalancedVllmNxDInferenceService` for deployment. Models published on HuggingFace can be easily compiled and deployed to ECS with Application Load Balancer.\n\n### Architecture\n\n\n\nThe construct automatically:\n\n- Calculates optimal tensor parallelism based on model size\n- Configures memory footprint for the ECS tasks\n- Sets up the Application Load Balancer with health checks\n- Deploys the compiled model to ECS tasks\n- Configures auto-scaling policies\n\nThe service exposes a REST API endpoint through the Application Load Balancer that can be used to perform inference with the deployed model.\n\n### Basic Usage\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\n\nconst compiler = new VllmNxdInferenceCompiler(this, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n});\n\nconst compiledModel = compiler.compile();\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n }\n);\n\nconst service = new ApplicationLoadBalancedVllmNxDInferenceService(\n this,\n \"Service\",\n {\n vpc,\n taskDefinition,\n }\n);\n```\n\n### Complete Example\n\nHere's a complete example with VPC and S3 bucket creation, including access from other ECS tasks:\n\n```ts\nimport * as cdk from \"aws-cdk-lib\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as ecs from \"aws-cdk-lib/aws-ecs\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\nexport class MyVllmStack extends cdk.Stack {\n constructor(scope: cdk.App, id: string, props?: cdk.StackProps) {\n super(scope, id, props);\n\n // Create VPC\n const vpc = new ec2.Vpc(this, \"Vpc\", {\n maxAzs: 2,\n natGateways: 1,\n });\n\n // Create S3 bucket for compiled models\n const bucket = new s3.Bucket(this, \"ModelBucket\", {\n removalPolicy: cdk.RemovalPolicy.DESTROY,\n autoDeleteObjects: true,\n });\n\n // Compile the model\n const compiler = new VllmNxdInferenceCompiler(this, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n });\n\n const compiledModel = compiler.compile();\n\n // Create task definition\n const taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n }\n );\n\n // Deploy service with ALB\n const service = new ApplicationLoadBalancedVllmNxDInferenceService(\n this,\n \"Service\",\n {\n vpc,\n taskDefinition,\n }\n );\n\n // Allow access from other ECS tasks\n const cluster = new ecs.Cluster(this, \"AppCluster\", { vpc });\n const appTaskDefinition = new ecs.FargateTaskDefinition(\n this,\n \"AppTaskDefinition\"\n );\n appTaskDefinition.addContainer(\"app\", {\n image: ecs.ContainerImage.fromRegistry(\"amazon/amazon-ecs-sample\"),\n logging: ecs.LogDrivers.awsLogs({ streamPrefix: \"app\" }),\n });\n\n const appService = new ecs.FargateService(this, \"AppService\", {\n cluster,\n taskDefinition: appTaskDefinition,\n });\n\n // Allow application service to access inference service\n service.service.connections.allowFrom(\n appService,\n ec2.Port.tcp(8000),\n \"Allow access from application service\"\n );\n\n // Output the load balancer URL\n new cdk.CfnOutput(this, \"LoadBalancerURL\", {\n value: `http://${service.loadBalancer.loadBalancerDnsName}`,\n description: \"Load Balancer URL for inference endpoint\",\n });\n }\n}\n```\n\n### Using Specific Official AWS Neuron vLLM Image Version\n\nThis library supports the official AWS Neuron Deep Learning Containers for vLLM inference. You can use the `VllmInferenceNeuronxImage` class to reference these images and `VllmNxdInferenceImage.fromNeuronSdkVersion` to create a compatible image object:\n\n```typescript\nimport { VllmNxdInferenceImage, VllmInferenceNeuronxImage } from \"aws-cdk-neuronx-patterns\";\n\n// Use the official vLLM Neuron Image\nconst vllmImage = VllmNxdInferenceImage.fromNeuronSdkVersion(\n VllmInferenceNeuronxImage.SDK_2_26_0\n);\n\n// Use with task definition\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n image: vllmImage, // Default is using latest official vLLM Neuron Image\n }\n);\n```\n\n### Using HuggingFace Token with Secrets\n\nWhen working with private or gated models on HuggingFace, you need to provide an authentication token. For security best practices, store your HuggingFace token in AWS Secrets Manager and pass it to both the compiler and inference environments:\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport * as batch from \"aws-cdk-lib/aws-batch\";\nimport { Secret } from \"aws-cdk-lib/aws-secretsmanager\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\n\n// Reference an existing secret containing your HuggingFace token\nconst hfTokenSecret = Secret.fromSecretNameV2(\n this,\n \"HFTokenSecret\",\n \"my-huggingface-token\"\n);\nconst hfToken = batch.Secret.fromSecretsManager(hfTokenSecret, \"readonlyToken\");\n\n// Pass the secret to the compiler\nconst compiler = new VllmNxdInferenceCompiler(this, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"meta-llama/Meta-Llama-3-8B\"),\n vllmArgs: {\n hfToken, // Pass the HF token secret here\n },\n});\n\nconst compiledModel = compiler.compile();\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n }\n);\n\nconst service = new ApplicationLoadBalancedVllmNxDInferenceService(\n this,\n \"Service\",\n {\n vpc,\n taskDefinition,\n }\n);\n```\n\nThe secret will be securely passed as an environment variable to the compilation batch job and the ECS tasks running the inference server.\n\n## Neuronx Compiler\n\n> [!WARNING]\n> This construct uses an Inferentia2 instance on EC2. You may need to increase your service quota for Inferentia2 instances in your AWS account.\n\nThis construct compiles models supported by Neuronx and uploads them to the specified S3 bucket. The construct automatically selects the required instance type based on the number of model parameters.\n\n\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport { NeuronxCompiler, Model } from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\ndeclare const image: INeuronxContainerImage;\n\nconst compiler = new NeuronxCompiler(this, \"NeuronxCompiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n artifactS3Prefix: \"my-compiled-artifacts\",\n image,\n});\n\nconst compiledModel = compiler.compile();\n\n// Get the compiled artifacts from this S3 URL\nnew cdk.CfnOutput(this, \"CompiledArtifact\", {\n value: compiledModel.s3Url,\n});\n```\n\n### Spot Instance\n\n> [!WARNING]\n> If you use Spot Instances, verify that your service quota for Spot instances has been increased.\n\nYou can reduce costs by using Spot Instances for compilation:\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport { NeuronxCompiler, Model } from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\ndeclare const image: INeuronxContainerImage;\n\nnew NeuronxCompiler(this, \"NeuronxCompiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n artifactS3Prefix: \"my-compiled-artifacts\",\n image,\n spot: true, // Enable Spot Instances\n});\n```\n\n## API Reference\n\nFor detailed API documentation, see [API.md](./API.md).\n\n## Cost Considerations\n\n> [!IMPORTANT]\n> This library deploys AWS resources that incur costs:\n> - **Inferentia2 instances** (EC2) - Significant hourly costs\n> - **Application Load Balancer** - Hourly and data processing charges\n> - **NAT Gateway** - Hourly and data processing charges\n> - **S3 storage** - Storage and request charges\n> - **Data transfer** - Charges for data transfer out\n\nFor cost estimates, use the [AWS Pricing Calculator](https://calculator.aws).\n\n**Cost optimization tips:**\n- Use Spot Instances for compilation jobs (can save up to 90%)\n- Delete resources when not in use (`cdk destroy`)\n- Use appropriate instance sizes for your workload\n- Monitor usage with AWS Cost Explorer\n\n## Troubleshooting\n\n### Common Issues\n\n**Issue: \"Service quota exceeded for Inferentia2 instances\"**\n- Solution: Request a quota increase via the [Service Quotas console](https://console.aws.amazon.com/servicequotas/)\n- Navigate to: EC2 → Running On-Demand Inf instances\n\n**Issue: \"Compilation job fails\"**\n- Check AWS Batch job logs in CloudWatch Logs\n- Verify the model exists on HuggingFace\n- Ensure sufficient disk space and memory for the model size\n\n**Issue: \"ECS tasks fail to start\"**\n- Check ECS task logs in CloudWatch\n- Verify S3 bucket permissions\n- Ensure the compiled model exists in S3\n\n**Issue: \"Health check failures\"**\n- Increase health check grace period\n- Verify security group rules allow ALB to reach ECS tasks\n- Check container logs for startup errors\n\n### Debugging\n\nView logs in CloudWatch:\n```bash\n# Batch job logs\naws logs tail /aws/batch/job --follow\n\n# ECS task logs\naws logs tail /ecs/vllm-inference --follow\n```\n\n## Security Best Practices\n\n- **Secrets Management**: Always use AWS Secrets Manager for sensitive data (HuggingFace tokens, API keys)\n- **IAM Roles**: Follow the principle of least privilege for IAM roles\n- **VPC Configuration**:\n - Deploy ECS tasks in private subnets\n - Use security groups to restrict traffic\n - Enable VPC Flow Logs for monitoring\n- **S3 Buckets**:\n - Enable encryption at rest\n - Use bucket policies to restrict access\n - Enable versioning for compiled models\n- **ALB**:\n - Use HTTPS with ACM certificates in production\n - Enable access logs for auditing\n\n## Contributing\n\nContributions are welcome! Please feel free to submit a Pull Request.\n\n## License\n\nThis library is licensed under the Apache-2.0 License. See the [LICENSE](./LICENSE) file.\n"
|
|
8574
|
+
"markdown": "# Neuronx patterns Construct Library\n\n> [!WARNING]\n> This library is experimental module.\n\nThis library provides high-level architectural patterns using AWS Neuronx (e.g. Inferentia2 and Trainium1). It contains:\n\n- vLLM with NxD Inference on ALB & ECS on EC2\n- Neuronx Compiler\n\n[日本語版 README はこちら](./README.ja.md)\n\n## Table of Contents\n\n- [Installation](#installation)\n- [Quick Start](#quick-start)\n- [vLLM NxD Inference on ALB & ECS on EC2](#vllm-nxd-inference-on-alb--ecs-on-ec2)\n - [Architecture](#architecture)\n - [Basic Usage](#basic-usage)\n - [Complete Example](#complete-example)\n - [Using Specific Official AWS Neuron vLLM Image Version](#using-specific-official-aws-neuron-vllm-image-version)\n - [Using HuggingFace Token with Secrets](#using-huggingface-token-with-secrets)\n- [Neuronx Compiler](#neuronx-compiler)\n - [Spot Instance](#spot-instance)\n- [API Reference](#api-reference)\n- [Cost Considerations](#cost-considerations)\n- [Troubleshooting](#troubleshooting)\n- [Security Best Practices](#security-best-practices)\n- [License](#license)\n\n## Installation\n\n```bash\n# NPM\nnpm i aws-cdk-neuronx-patterns\n\n# yarn\nyarn add aws-cdk-neuronx-patterns\n\n# PNPM\npnpm i aws-cdk-neuronx-patterns\n```\n\n## Quick Start\n\nHere's a minimal example to deploy a vLLM inference service:\n\n```ts\nimport * as cdk from \"aws-cdk-lib\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\nconst app = new cdk.App();\nconst stack = new cdk.Stack(app, \"VllmInferenceStack\");\n\nconst vpc = new ec2.Vpc(stack, \"Vpc\", { maxAzs: 2 });\nconst bucket = new s3.Bucket(stack, \"ModelBucket\");\n\nconst compiler = new VllmNxdInferenceCompiler(stack, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n});\n\nconst compiledModel = compiler.compile();\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(stack, \"TaskDef\", {\n compiledModel,\n});\n\nconst service = new ApplicationLoadBalancedVllmNxDInferenceService(\n stack,\n \"Service\",\n { vpc, taskDefinition }\n);\n\nnew cdk.CfnOutput(stack, \"LoadBalancerDNS\", {\n value: service.loadBalancer.loadBalancerDnsName,\n});\n```\n\n## vLLM NxD Inference on ALB & ECS on EC2\n\n> [!WARNING]\n> This construct uses an Inferentia2 instance on EC2 for inference. You may need to increase your service quota for Inferentia2 instances in your AWS account via the [Service Quotas console](https://console.aws.amazon.com/servicequotas/).\n\n> [!NOTE]\n> Model compilation is performed on standard (non-Neuron) EC2 instances via cross-compilation, so no Inferentia/Trainium quota is needed for the compilation phase.\n\nThis pattern combines `VllmNxdInferenceCompiler` for model compilation and `ApplicationLoadBalancedVllmNxDInferenceService` for deployment. Models published on HuggingFace can be easily compiled and deployed to ECS with Application Load Balancer.\n\n### Architecture\n\n\n\nThe construct automatically:\n\n- Calculates optimal tensor parallelism based on model size\n- Configures memory footprint for the ECS tasks\n- Sets up the Application Load Balancer with health checks\n- Deploys the compiled model to ECS tasks\n- Configures auto-scaling policies\n\nThe service exposes a REST API endpoint through the Application Load Balancer that can be used to perform inference with the deployed model.\n\n### Basic Usage\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\n\nconst compiler = new VllmNxdInferenceCompiler(this, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n});\n\nconst compiledModel = compiler.compile();\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n }\n);\n\nconst service = new ApplicationLoadBalancedVllmNxDInferenceService(\n this,\n \"Service\",\n {\n vpc,\n taskDefinition,\n }\n);\n```\n\n### Complete Example\n\nHere's a complete example with VPC and S3 bucket creation, including access from other ECS tasks:\n\n```ts\nimport * as cdk from \"aws-cdk-lib\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as ecs from \"aws-cdk-lib/aws-ecs\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\nexport class MyVllmStack extends cdk.Stack {\n constructor(scope: cdk.App, id: string, props?: cdk.StackProps) {\n super(scope, id, props);\n\n // Create VPC\n const vpc = new ec2.Vpc(this, \"Vpc\", {\n maxAzs: 2,\n natGateways: 1,\n });\n\n // Create S3 bucket for compiled models\n const bucket = new s3.Bucket(this, \"ModelBucket\", {\n removalPolicy: cdk.RemovalPolicy.DESTROY,\n autoDeleteObjects: true,\n });\n\n // Compile the model\n const compiler = new VllmNxdInferenceCompiler(this, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n });\n\n const compiledModel = compiler.compile();\n\n // Create task definition\n const taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n }\n );\n\n // Deploy service with ALB\n const service = new ApplicationLoadBalancedVllmNxDInferenceService(\n this,\n \"Service\",\n {\n vpc,\n taskDefinition,\n }\n );\n\n // Allow access from other ECS tasks\n const cluster = new ecs.Cluster(this, \"AppCluster\", { vpc });\n const appTaskDefinition = new ecs.FargateTaskDefinition(\n this,\n \"AppTaskDefinition\"\n );\n appTaskDefinition.addContainer(\"app\", {\n image: ecs.ContainerImage.fromRegistry(\"amazon/amazon-ecs-sample\"),\n logging: ecs.LogDrivers.awsLogs({ streamPrefix: \"app\" }),\n });\n\n const appService = new ecs.FargateService(this, \"AppService\", {\n cluster,\n taskDefinition: appTaskDefinition,\n });\n\n // Allow application service to access inference service\n service.service.connections.allowFrom(\n appService,\n ec2.Port.tcp(8000),\n \"Allow access from application service\"\n );\n\n // Output the load balancer URL\n new cdk.CfnOutput(this, \"LoadBalancerURL\", {\n value: `http://${service.loadBalancer.loadBalancerDnsName}`,\n description: \"Load Balancer URL for inference endpoint\",\n });\n }\n}\n```\n\n### Using Specific Official AWS Neuron vLLM Image Version\n\nThis library supports the official AWS Neuron Deep Learning Containers for vLLM inference. You can use the `VllmInferenceNeuronxImage` class to reference these images and `VllmNxdInferenceImage.fromNeuronSdkVersion` to create a compatible image object:\n\n```typescript\nimport { VllmNxdInferenceImage, VllmInferenceNeuronxImage } from \"aws-cdk-neuronx-patterns\";\n\n// Use the official vLLM Neuron Image\nconst vllmImage = VllmNxdInferenceImage.fromNeuronSdkVersion(\n VllmInferenceNeuronxImage.SDK_2_26_0\n);\n\n// Use with task definition\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n image: vllmImage, // Default is using latest official vLLM Neuron Image\n }\n);\n```\n\n### Using HuggingFace Token with Secrets\n\nWhen working with private or gated models on HuggingFace, you need to provide an authentication token. For security best practices, store your HuggingFace token in AWS Secrets Manager and pass it to both the compiler and inference environments:\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport * as batch from \"aws-cdk-lib/aws-batch\";\nimport { Secret } from \"aws-cdk-lib/aws-secretsmanager\";\nimport {\n VllmNxdInferenceCompiler,\n VllmNxdInferenceTaskDefinition,\n ApplicationLoadBalancedVllmNxDInferenceService,\n Model,\n} from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\n\n// Reference an existing secret containing your HuggingFace token\nconst hfTokenSecret = Secret.fromSecretNameV2(\n this,\n \"HFTokenSecret\",\n \"my-huggingface-token\"\n);\nconst hfToken = batch.Secret.fromSecretsManager(hfTokenSecret, \"readonlyToken\");\n\n// Pass the secret to the compiler\nconst compiler = new VllmNxdInferenceCompiler(this, \"Compiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"meta-llama/Meta-Llama-3-8B\"),\n vllmArgs: {\n hfToken, // Pass the HF token secret here\n },\n});\n\nconst compiledModel = compiler.compile();\nconst taskDefinition = new VllmNxdInferenceTaskDefinition(\n this,\n \"TaskDefinition\",\n {\n compiledModel,\n }\n);\n\nconst service = new ApplicationLoadBalancedVllmNxDInferenceService(\n this,\n \"Service\",\n {\n vpc,\n taskDefinition,\n }\n);\n```\n\nThe secret will be securely passed as an environment variable to the compilation batch job and the ECS tasks running the inference server.\n\n## Neuronx Compiler\n\n> [!WARNING]\n> This construct uses an Inferentia2 instance on EC2. You may need to increase your service quota for Inferentia2 instances in your AWS account.\n\nThis construct compiles models supported by Neuronx and uploads them to the specified S3 bucket. The construct automatically selects the required instance type based on the number of model parameters.\n\nThere are two compiler variants:\n\n- **`NeuronxNativeCompiler`** — Compiles on Neuron instances (Inferentia2/Trainium). Requires Neuron device quota.\n- **`NeuronxCrossCompiler`** — Compiles on standard EC2 instances (e.g., `c7i-flex.4xlarge`) without Neuron hardware. Used by `VllmNxdInferenceCompiler` by default.\n\nBoth implement the `INeuronxCompiler` interface and produce compatible artifacts.\n\n\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport { NeuronxNativeCompiler, Model } from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\ndeclare const image: INeuronxContainerImage;\n\nconst compiler = new NeuronxNativeCompiler(this, \"NeuronxCompiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n artifactS3Prefix: \"my-compiled-artifacts\",\n image,\n});\n\nconst compiledModel = compiler.compile();\n\n// Get the compiled artifacts from this S3 URL\nnew cdk.CfnOutput(this, \"CompiledArtifact\", {\n value: compiledModel.s3Url,\n});\n```\n\n### Spot Instance\n\n> [!WARNING]\n> If you use Spot Instances, verify that your service quota for Spot instances has been increased.\n\nYou can reduce costs by using Spot Instances for compilation:\n\n```ts\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport * as s3 from \"aws-cdk-lib/aws-s3\";\nimport { NeuronxNativeCompiler, Model } from \"aws-cdk-neuronx-patterns\";\n\ndeclare const vpc: ec2.Vpc;\ndeclare const bucket: s3.Bucket;\ndeclare const image: INeuronxContainerImage;\n\nnew NeuronxNativeCompiler(this, \"NeuronxCompiler\", {\n vpc,\n bucket,\n model: Model.fromHuggingFace(\"HuggingFaceTB/SmolLM-135M-Instruct\"),\n artifactS3Prefix: \"my-compiled-artifacts\",\n image,\n spot: true, // Enable Spot Instances\n});\n```\n\n## API Reference\n\nFor detailed API documentation, see [API.md](./API.md).\n\n## Cost Considerations\n\n> [!IMPORTANT]\n> This library deploys AWS resources that incur costs:\n> - **Inferentia2 instances** (EC2) - Significant hourly costs\n> - **Application Load Balancer** - Hourly and data processing charges\n> - **NAT Gateway** - Hourly and data processing charges\n> - **S3 storage** - Storage and request charges\n> - **Data transfer** - Charges for data transfer out\n\nFor cost estimates, use the [AWS Pricing Calculator](https://calculator.aws).\n\n**Cost optimization tips:**\n- The `VllmNxdInferenceCompiler` uses cross-compilation on standard EC2 instances by default, avoiding expensive Neuron instances during compilation\n- Use Spot Instances for compilation jobs (can save up to 90%)\n- Delete resources when not in use (`cdk destroy`)\n- Use appropriate instance sizes for your workload\n- Monitor usage with AWS Cost Explorer\n\n## Troubleshooting\n\n### Common Issues\n\n**Issue: \"Service quota exceeded for Inferentia2 instances\"**\n- Solution: Request a quota increase via the [Service Quotas console](https://console.aws.amazon.com/servicequotas/)\n- Navigate to: EC2 → Running On-Demand Inf instances\n\n**Issue: \"Compilation job fails\"**\n- Check AWS Batch job logs in CloudWatch Logs\n- Verify the model exists on HuggingFace\n- Ensure sufficient disk space and memory for the model size\n\n**Issue: \"ECS tasks fail to start\"**\n- Check ECS task logs in CloudWatch\n- Verify S3 bucket permissions\n- Ensure the compiled model exists in S3\n\n**Issue: \"Health check failures\"**\n- Increase health check grace period\n- Verify security group rules allow ALB to reach ECS tasks\n- Check container logs for startup errors\n\n### Debugging\n\nView logs in CloudWatch:\n```bash\n# Batch job logs\naws logs tail /aws/batch/job --follow\n\n# ECS task logs\naws logs tail /ecs/vllm-inference --follow\n```\n\n## Security Best Practices\n\n- **Secrets Management**: Always use AWS Secrets Manager for sensitive data (HuggingFace tokens, API keys)\n- **IAM Roles**: Follow the principle of least privilege for IAM roles\n- **VPC Configuration**:\n - Deploy ECS tasks in private subnets\n - Use security groups to restrict traffic\n - Enable VPC Flow Logs for monitoring\n- **S3 Buckets**:\n - Enable encryption at rest\n - Use bucket policies to restrict access\n - Enable versioning for compiled models\n- **ALB**:\n - Use HTTPS with ACM certificates in production\n - Enable access logs for auditing\n\n## Contributing\n\nContributions are welcome! Please feel free to submit a Pull Request.\n\n## License\n\nThis library is licensed under the Apache-2.0 License. See the [LICENSE](./LICENSE) file.\n"
|
|
8575
8575
|
},
|
|
8576
8576
|
"repository": {
|
|
8577
8577
|
"type": "git",
|
|
@@ -8866,6 +8866,56 @@
|
|
|
8866
8866
|
"name": "ChatTemplateContentFormat",
|
|
8867
8867
|
"symbolId": "src/base/server-engine/vllm-engine/vllm-engine-argments:ChatTemplateContentFormat"
|
|
8868
8868
|
},
|
|
8869
|
+
"aws-cdk-neuronx-patterns.ComputeEnvironmentResult": {
|
|
8870
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
8871
|
+
"datatype": true,
|
|
8872
|
+
"docs": {
|
|
8873
|
+
"stability": "stable",
|
|
8874
|
+
"summary": "Result of creating a compute environment."
|
|
8875
|
+
},
|
|
8876
|
+
"fqn": "aws-cdk-neuronx-patterns.ComputeEnvironmentResult",
|
|
8877
|
+
"kind": "interface",
|
|
8878
|
+
"locationInModule": {
|
|
8879
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
8880
|
+
"line": 144
|
|
8881
|
+
},
|
|
8882
|
+
"name": "ComputeEnvironmentResult",
|
|
8883
|
+
"properties": [
|
|
8884
|
+
{
|
|
8885
|
+
"abstract": true,
|
|
8886
|
+
"docs": {
|
|
8887
|
+
"stability": "stable",
|
|
8888
|
+
"summary": "The compute environment."
|
|
8889
|
+
},
|
|
8890
|
+
"immutable": true,
|
|
8891
|
+
"locationInModule": {
|
|
8892
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
8893
|
+
"line": 148
|
|
8894
|
+
},
|
|
8895
|
+
"name": "computeEnvironment",
|
|
8896
|
+
"type": {
|
|
8897
|
+
"fqn": "aws-cdk-lib.aws_batch.IComputeEnvironment"
|
|
8898
|
+
}
|
|
8899
|
+
},
|
|
8900
|
+
{
|
|
8901
|
+
"abstract": true,
|
|
8902
|
+
"docs": {
|
|
8903
|
+
"stability": "stable",
|
|
8904
|
+
"summary": "The instance role associated with the compute environment."
|
|
8905
|
+
},
|
|
8906
|
+
"immutable": true,
|
|
8907
|
+
"locationInModule": {
|
|
8908
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
8909
|
+
"line": 152
|
|
8910
|
+
},
|
|
8911
|
+
"name": "instanceRole",
|
|
8912
|
+
"type": {
|
|
8913
|
+
"fqn": "aws-cdk-lib.aws_iam.IRole"
|
|
8914
|
+
}
|
|
8915
|
+
}
|
|
8916
|
+
],
|
|
8917
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-compiler-base:ComputeEnvironmentResult"
|
|
8918
|
+
},
|
|
8869
8919
|
"aws-cdk-neuronx-patterns.ConfigFormat": {
|
|
8870
8920
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
8871
8921
|
"docs": {
|
|
@@ -9221,6 +9271,39 @@
|
|
|
9221
9271
|
],
|
|
9222
9272
|
"symbolId": "src/base/neuronx/neuronx-instance-type:IAcceleratorChips"
|
|
9223
9273
|
},
|
|
9274
|
+
"aws-cdk-neuronx-patterns.INeuronxCompiler": {
|
|
9275
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
9276
|
+
"docs": {
|
|
9277
|
+
"stability": "stable",
|
|
9278
|
+
"summary": "Interface for Neuronx compilers."
|
|
9279
|
+
},
|
|
9280
|
+
"fqn": "aws-cdk-neuronx-patterns.INeuronxCompiler",
|
|
9281
|
+
"kind": "interface",
|
|
9282
|
+
"locationInModule": {
|
|
9283
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
9284
|
+
"line": 72
|
|
9285
|
+
},
|
|
9286
|
+
"methods": [
|
|
9287
|
+
{
|
|
9288
|
+
"abstract": true,
|
|
9289
|
+
"docs": {
|
|
9290
|
+
"stability": "stable"
|
|
9291
|
+
},
|
|
9292
|
+
"locationInModule": {
|
|
9293
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
9294
|
+
"line": 73
|
|
9295
|
+
},
|
|
9296
|
+
"name": "compile",
|
|
9297
|
+
"returns": {
|
|
9298
|
+
"type": {
|
|
9299
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompiledModel"
|
|
9300
|
+
}
|
|
9301
|
+
}
|
|
9302
|
+
}
|
|
9303
|
+
],
|
|
9304
|
+
"name": "INeuronxCompiler",
|
|
9305
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-compiler-base:INeuronxCompiler"
|
|
9306
|
+
},
|
|
9224
9307
|
"aws-cdk-neuronx-patterns.INeuronxContainerImage": {
|
|
9225
9308
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
9226
9309
|
"docs": {
|
|
@@ -9230,8 +9313,8 @@
|
|
|
9230
9313
|
"fqn": "aws-cdk-neuronx-patterns.INeuronxContainerImage",
|
|
9231
9314
|
"kind": "interface",
|
|
9232
9315
|
"locationInModule": {
|
|
9233
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
9234
|
-
"line":
|
|
9316
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
9317
|
+
"line": 28
|
|
9235
9318
|
},
|
|
9236
9319
|
"name": "INeuronxContainerImage",
|
|
9237
9320
|
"properties": [
|
|
@@ -9243,8 +9326,8 @@
|
|
|
9243
9326
|
},
|
|
9244
9327
|
"immutable": true,
|
|
9245
9328
|
"locationInModule": {
|
|
9246
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
9247
|
-
"line":
|
|
9329
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
9330
|
+
"line": 32
|
|
9248
9331
|
},
|
|
9249
9332
|
"name": "image",
|
|
9250
9333
|
"type": {
|
|
@@ -9259,8 +9342,8 @@
|
|
|
9259
9342
|
},
|
|
9260
9343
|
"immutable": true,
|
|
9261
9344
|
"locationInModule": {
|
|
9262
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
9263
|
-
"line":
|
|
9345
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
9346
|
+
"line": 36
|
|
9264
9347
|
},
|
|
9265
9348
|
"name": "neuronSdkVersion",
|
|
9266
9349
|
"type": {
|
|
@@ -9268,7 +9351,7 @@
|
|
|
9268
9351
|
}
|
|
9269
9352
|
}
|
|
9270
9353
|
],
|
|
9271
|
-
"symbolId": "src/base/neuronx-compiler/neuronx-compiler:INeuronxContainerImage"
|
|
9354
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-compiler-base:INeuronxContainerImage"
|
|
9272
9355
|
},
|
|
9273
9356
|
"aws-cdk-neuronx-patterns.INeuronxImage": {
|
|
9274
9357
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
@@ -9340,7 +9423,7 @@
|
|
|
9340
9423
|
"kind": "interface",
|
|
9341
9424
|
"locationInModule": {
|
|
9342
9425
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
9343
|
-
"line":
|
|
9426
|
+
"line": 42
|
|
9344
9427
|
},
|
|
9345
9428
|
"name": "INeuronxInstanceType",
|
|
9346
9429
|
"properties": [
|
|
@@ -9352,7 +9435,7 @@
|
|
|
9352
9435
|
"immutable": true,
|
|
9353
9436
|
"locationInModule": {
|
|
9354
9437
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
9355
|
-
"line":
|
|
9438
|
+
"line": 47
|
|
9356
9439
|
},
|
|
9357
9440
|
"name": "acceleratorChips",
|
|
9358
9441
|
"type": {
|
|
@@ -9367,7 +9450,7 @@
|
|
|
9367
9450
|
"immutable": true,
|
|
9368
9451
|
"locationInModule": {
|
|
9369
9452
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
9370
|
-
"line":
|
|
9453
|
+
"line": 44
|
|
9371
9454
|
},
|
|
9372
9455
|
"name": "instanceType",
|
|
9373
9456
|
"type": {
|
|
@@ -9382,7 +9465,7 @@
|
|
|
9382
9465
|
"immutable": true,
|
|
9383
9466
|
"locationInModule": {
|
|
9384
9467
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
9385
|
-
"line":
|
|
9468
|
+
"line": 46
|
|
9386
9469
|
},
|
|
9387
9470
|
"name": "memory",
|
|
9388
9471
|
"type": {
|
|
@@ -9397,7 +9480,7 @@
|
|
|
9397
9480
|
"immutable": true,
|
|
9398
9481
|
"locationInModule": {
|
|
9399
9482
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
9400
|
-
"line":
|
|
9483
|
+
"line": 43
|
|
9401
9484
|
},
|
|
9402
9485
|
"name": "supportedTensorParallelism",
|
|
9403
9486
|
"type": {
|
|
@@ -9417,7 +9500,7 @@
|
|
|
9417
9500
|
"immutable": true,
|
|
9418
9501
|
"locationInModule": {
|
|
9419
9502
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
9420
|
-
"line":
|
|
9503
|
+
"line": 45
|
|
9421
9504
|
},
|
|
9422
9505
|
"name": "vCpu",
|
|
9423
9506
|
"type": {
|
|
@@ -10969,13 +11052,14 @@
|
|
|
10969
11052
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
10970
11053
|
"datatype": true,
|
|
10971
11054
|
"docs": {
|
|
10972
|
-
"stability": "stable"
|
|
11055
|
+
"stability": "stable",
|
|
11056
|
+
"summary": "The model compiled by Neuronx compiler."
|
|
10973
11057
|
},
|
|
10974
11058
|
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompiledModel",
|
|
10975
11059
|
"kind": "interface",
|
|
10976
11060
|
"locationInModule": {
|
|
10977
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
10978
|
-
"line":
|
|
11061
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11062
|
+
"line": 42
|
|
10979
11063
|
},
|
|
10980
11064
|
"name": "NeuronxCompiledModel",
|
|
10981
11065
|
"properties": [
|
|
@@ -10987,8 +11071,8 @@
|
|
|
10987
11071
|
},
|
|
10988
11072
|
"immutable": true,
|
|
10989
11073
|
"locationInModule": {
|
|
10990
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
10991
|
-
"line":
|
|
11074
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11075
|
+
"line": 50
|
|
10992
11076
|
},
|
|
10993
11077
|
"name": "bucket",
|
|
10994
11078
|
"type": {
|
|
@@ -10998,32 +11082,33 @@
|
|
|
10998
11082
|
{
|
|
10999
11083
|
"abstract": true,
|
|
11000
11084
|
"docs": {
|
|
11001
|
-
"stability": "stable"
|
|
11085
|
+
"stability": "stable",
|
|
11086
|
+
"summary": "The model name."
|
|
11002
11087
|
},
|
|
11003
11088
|
"immutable": true,
|
|
11004
11089
|
"locationInModule": {
|
|
11005
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11006
|
-
"line":
|
|
11090
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11091
|
+
"line": 62
|
|
11007
11092
|
},
|
|
11008
|
-
"name": "
|
|
11093
|
+
"name": "modelName",
|
|
11009
11094
|
"type": {
|
|
11010
|
-
"
|
|
11095
|
+
"primitive": "string"
|
|
11011
11096
|
}
|
|
11012
11097
|
},
|
|
11013
11098
|
{
|
|
11014
11099
|
"abstract": true,
|
|
11015
11100
|
"docs": {
|
|
11016
11101
|
"stability": "stable",
|
|
11017
|
-
"summary": "The model
|
|
11102
|
+
"summary": "The recommended Neuron instance type for running inference with this compiled model."
|
|
11018
11103
|
},
|
|
11019
11104
|
"immutable": true,
|
|
11020
11105
|
"locationInModule": {
|
|
11021
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11022
|
-
"line":
|
|
11106
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11107
|
+
"line": 46
|
|
11023
11108
|
},
|
|
11024
|
-
"name": "
|
|
11109
|
+
"name": "recommendedInstanceType",
|
|
11025
11110
|
"type": {
|
|
11026
|
-
"
|
|
11111
|
+
"fqn": "aws-cdk-neuronx-patterns.INeuronxInstanceType"
|
|
11027
11112
|
}
|
|
11028
11113
|
},
|
|
11029
11114
|
{
|
|
@@ -11034,8 +11119,8 @@
|
|
|
11034
11119
|
},
|
|
11035
11120
|
"immutable": true,
|
|
11036
11121
|
"locationInModule": {
|
|
11037
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11038
|
-
"line":
|
|
11122
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11123
|
+
"line": 58
|
|
11039
11124
|
},
|
|
11040
11125
|
"name": "s3Prefix",
|
|
11041
11126
|
"type": {
|
|
@@ -11050,8 +11135,8 @@
|
|
|
11050
11135
|
},
|
|
11051
11136
|
"immutable": true,
|
|
11052
11137
|
"locationInModule": {
|
|
11053
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11054
|
-
"line":
|
|
11138
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11139
|
+
"line": 54
|
|
11055
11140
|
},
|
|
11056
11141
|
"name": "s3Uri",
|
|
11057
11142
|
"type": {
|
|
@@ -11061,12 +11146,13 @@
|
|
|
11061
11146
|
{
|
|
11062
11147
|
"abstract": true,
|
|
11063
11148
|
"docs": {
|
|
11064
|
-
"stability": "stable"
|
|
11149
|
+
"stability": "stable",
|
|
11150
|
+
"summary": "The weight size of the model."
|
|
11065
11151
|
},
|
|
11066
11152
|
"immutable": true,
|
|
11067
11153
|
"locationInModule": {
|
|
11068
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11069
|
-
"line":
|
|
11154
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11155
|
+
"line": 66
|
|
11070
11156
|
},
|
|
11071
11157
|
"name": "weightSize",
|
|
11072
11158
|
"type": {
|
|
@@ -11074,24 +11160,25 @@
|
|
|
11074
11160
|
}
|
|
11075
11161
|
}
|
|
11076
11162
|
],
|
|
11077
|
-
"symbolId": "src/base/neuronx-compiler/neuronx-compiler:NeuronxCompiledModel"
|
|
11163
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-compiler-base:NeuronxCompiledModel"
|
|
11078
11164
|
},
|
|
11079
|
-
"aws-cdk-neuronx-patterns.
|
|
11165
|
+
"aws-cdk-neuronx-patterns.NeuronxCompilerBase": {
|
|
11166
|
+
"abstract": true,
|
|
11080
11167
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
11081
11168
|
"base": "constructs.Construct",
|
|
11082
11169
|
"docs": {
|
|
11083
|
-
"remarks": "
|
|
11170
|
+
"remarks": "Provides the common orchestration logic (Lambda, CustomResource, WaitCondition)\nwhile subclasses define how to create the Batch compute environment and job definition.",
|
|
11084
11171
|
"stability": "stable",
|
|
11085
|
-
"summary": "Neuronx
|
|
11172
|
+
"summary": "Abstract base class for Neuronx compilers."
|
|
11086
11173
|
},
|
|
11087
|
-
"fqn": "aws-cdk-neuronx-patterns.
|
|
11174
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
11088
11175
|
"initializer": {
|
|
11089
11176
|
"docs": {
|
|
11090
11177
|
"stability": "stable"
|
|
11091
11178
|
},
|
|
11092
11179
|
"locationInModule": {
|
|
11093
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11094
|
-
"line":
|
|
11180
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11181
|
+
"line": 172
|
|
11095
11182
|
},
|
|
11096
11183
|
"parameters": [
|
|
11097
11184
|
{
|
|
@@ -11109,15 +11196,18 @@
|
|
|
11109
11196
|
{
|
|
11110
11197
|
"name": "props",
|
|
11111
11198
|
"type": {
|
|
11112
|
-
"fqn": "aws-cdk-neuronx-patterns.
|
|
11199
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11113
11200
|
}
|
|
11114
11201
|
}
|
|
11115
11202
|
]
|
|
11116
11203
|
},
|
|
11204
|
+
"interfaces": [
|
|
11205
|
+
"aws-cdk-neuronx-patterns.INeuronxCompiler"
|
|
11206
|
+
],
|
|
11117
11207
|
"kind": "class",
|
|
11118
11208
|
"locationInModule": {
|
|
11119
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11120
|
-
"line":
|
|
11209
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11210
|
+
"line": 160
|
|
11121
11211
|
},
|
|
11122
11212
|
"methods": [
|
|
11123
11213
|
{
|
|
@@ -11125,34 +11215,166 @@
|
|
|
11125
11215
|
"stability": "stable"
|
|
11126
11216
|
},
|
|
11127
11217
|
"locationInModule": {
|
|
11128
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11129
|
-
"line":
|
|
11218
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11219
|
+
"line": 284
|
|
11130
11220
|
},
|
|
11131
11221
|
"name": "compile",
|
|
11222
|
+
"overrides": "aws-cdk-neuronx-patterns.INeuronxCompiler",
|
|
11132
11223
|
"returns": {
|
|
11133
11224
|
"type": {
|
|
11134
11225
|
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompiledModel"
|
|
11135
11226
|
}
|
|
11136
11227
|
}
|
|
11228
|
+
},
|
|
11229
|
+
{
|
|
11230
|
+
"abstract": true,
|
|
11231
|
+
"docs": {
|
|
11232
|
+
"remarks": "Subclasses must implement this to provide the appropriate compute environment.",
|
|
11233
|
+
"stability": "stable",
|
|
11234
|
+
"summary": "Create the Batch compute environment."
|
|
11235
|
+
},
|
|
11236
|
+
"locationInModule": {
|
|
11237
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11238
|
+
"line": 251
|
|
11239
|
+
},
|
|
11240
|
+
"name": "createComputeEnvironment",
|
|
11241
|
+
"parameters": [
|
|
11242
|
+
{
|
|
11243
|
+
"name": "props",
|
|
11244
|
+
"type": {
|
|
11245
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11246
|
+
}
|
|
11247
|
+
}
|
|
11248
|
+
],
|
|
11249
|
+
"protected": true,
|
|
11250
|
+
"returns": {
|
|
11251
|
+
"type": {
|
|
11252
|
+
"fqn": "aws-cdk-neuronx-patterns.ComputeEnvironmentResult"
|
|
11253
|
+
}
|
|
11254
|
+
}
|
|
11255
|
+
},
|
|
11256
|
+
{
|
|
11257
|
+
"abstract": true,
|
|
11258
|
+
"docs": {
|
|
11259
|
+
"remarks": "Subclasses must implement this to provide the appropriate job definition.",
|
|
11260
|
+
"stability": "stable",
|
|
11261
|
+
"summary": "Create the Batch job definition."
|
|
11262
|
+
},
|
|
11263
|
+
"locationInModule": {
|
|
11264
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11265
|
+
"line": 259
|
|
11266
|
+
},
|
|
11267
|
+
"name": "createJobDefinition",
|
|
11268
|
+
"parameters": [
|
|
11269
|
+
{
|
|
11270
|
+
"name": "props",
|
|
11271
|
+
"type": {
|
|
11272
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11273
|
+
}
|
|
11274
|
+
}
|
|
11275
|
+
],
|
|
11276
|
+
"protected": true,
|
|
11277
|
+
"returns": {
|
|
11278
|
+
"type": {
|
|
11279
|
+
"fqn": "aws-cdk-lib.aws_batch.IJobDefinition"
|
|
11280
|
+
}
|
|
11281
|
+
}
|
|
11137
11282
|
}
|
|
11138
11283
|
],
|
|
11139
|
-
"name": "
|
|
11140
|
-
"
|
|
11284
|
+
"name": "NeuronxCompilerBase",
|
|
11285
|
+
"properties": [
|
|
11286
|
+
{
|
|
11287
|
+
"docs": {
|
|
11288
|
+
"stability": "stable"
|
|
11289
|
+
},
|
|
11290
|
+
"immutable": true,
|
|
11291
|
+
"locationInModule": {
|
|
11292
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11293
|
+
"line": 166
|
|
11294
|
+
},
|
|
11295
|
+
"name": "artifactS3Prefix",
|
|
11296
|
+
"protected": true,
|
|
11297
|
+
"type": {
|
|
11298
|
+
"primitive": "string"
|
|
11299
|
+
}
|
|
11300
|
+
},
|
|
11301
|
+
{
|
|
11302
|
+
"docs": {
|
|
11303
|
+
"stability": "stable"
|
|
11304
|
+
},
|
|
11305
|
+
"immutable": true,
|
|
11306
|
+
"locationInModule": {
|
|
11307
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11308
|
+
"line": 170
|
|
11309
|
+
},
|
|
11310
|
+
"name": "bucket",
|
|
11311
|
+
"protected": true,
|
|
11312
|
+
"type": {
|
|
11313
|
+
"fqn": "aws-cdk-lib.aws_s3.IBucket"
|
|
11314
|
+
}
|
|
11315
|
+
},
|
|
11316
|
+
{
|
|
11317
|
+
"docs": {
|
|
11318
|
+
"stability": "stable"
|
|
11319
|
+
},
|
|
11320
|
+
"immutable": true,
|
|
11321
|
+
"locationInModule": {
|
|
11322
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11323
|
+
"line": 169
|
|
11324
|
+
},
|
|
11325
|
+
"name": "model",
|
|
11326
|
+
"protected": true,
|
|
11327
|
+
"type": {
|
|
11328
|
+
"fqn": "aws-cdk-neuronx-patterns.Model"
|
|
11329
|
+
}
|
|
11330
|
+
},
|
|
11331
|
+
{
|
|
11332
|
+
"docs": {
|
|
11333
|
+
"stability": "stable"
|
|
11334
|
+
},
|
|
11335
|
+
"immutable": true,
|
|
11336
|
+
"locationInModule": {
|
|
11337
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11338
|
+
"line": 168
|
|
11339
|
+
},
|
|
11340
|
+
"name": "neuronxInstanceType",
|
|
11341
|
+
"protected": true,
|
|
11342
|
+
"type": {
|
|
11343
|
+
"fqn": "aws-cdk-neuronx-patterns.INeuronxInstanceType"
|
|
11344
|
+
}
|
|
11345
|
+
},
|
|
11346
|
+
{
|
|
11347
|
+
"docs": {
|
|
11348
|
+
"stability": "stable"
|
|
11349
|
+
},
|
|
11350
|
+
"immutable": true,
|
|
11351
|
+
"locationInModule": {
|
|
11352
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11353
|
+
"line": 167
|
|
11354
|
+
},
|
|
11355
|
+
"name": "weightSize",
|
|
11356
|
+
"protected": true,
|
|
11357
|
+
"type": {
|
|
11358
|
+
"fqn": "aws-cdk-lib.Size"
|
|
11359
|
+
}
|
|
11360
|
+
}
|
|
11361
|
+
],
|
|
11362
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-compiler-base:NeuronxCompilerBase"
|
|
11141
11363
|
},
|
|
11142
|
-
"aws-cdk-neuronx-patterns.
|
|
11364
|
+
"aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps": {
|
|
11143
11365
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
11144
11366
|
"datatype": true,
|
|
11145
11367
|
"docs": {
|
|
11146
11368
|
"stability": "stable",
|
|
11147
|
-
"summary": "
|
|
11369
|
+
"summary": "Common props for NeuronxCompilerBase."
|
|
11148
11370
|
},
|
|
11149
|
-
"fqn": "aws-cdk-neuronx-patterns.
|
|
11371
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps",
|
|
11150
11372
|
"kind": "interface",
|
|
11151
11373
|
"locationInModule": {
|
|
11152
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11153
|
-
"line":
|
|
11374
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11375
|
+
"line": 79
|
|
11154
11376
|
},
|
|
11155
|
-
"name": "
|
|
11377
|
+
"name": "NeuronxCompilerBaseProps",
|
|
11156
11378
|
"properties": [
|
|
11157
11379
|
{
|
|
11158
11380
|
"abstract": true,
|
|
@@ -11163,8 +11385,8 @@
|
|
|
11163
11385
|
},
|
|
11164
11386
|
"immutable": true,
|
|
11165
11387
|
"locationInModule": {
|
|
11166
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11167
|
-
"line":
|
|
11388
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11389
|
+
"line": 96
|
|
11168
11390
|
},
|
|
11169
11391
|
"name": "artifactS3Prefix",
|
|
11170
11392
|
"type": {
|
|
@@ -11179,8 +11401,8 @@
|
|
|
11179
11401
|
},
|
|
11180
11402
|
"immutable": true,
|
|
11181
11403
|
"locationInModule": {
|
|
11182
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11183
|
-
"line":
|
|
11404
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11405
|
+
"line": 87
|
|
11184
11406
|
},
|
|
11185
11407
|
"name": "bucket",
|
|
11186
11408
|
"type": {
|
|
@@ -11195,8 +11417,8 @@
|
|
|
11195
11417
|
},
|
|
11196
11418
|
"immutable": true,
|
|
11197
11419
|
"locationInModule": {
|
|
11198
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11199
|
-
"line":
|
|
11420
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11421
|
+
"line": 108
|
|
11200
11422
|
},
|
|
11201
11423
|
"name": "image",
|
|
11202
11424
|
"type": {
|
|
@@ -11211,8 +11433,8 @@
|
|
|
11211
11433
|
},
|
|
11212
11434
|
"immutable": true,
|
|
11213
11435
|
"locationInModule": {
|
|
11214
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11215
|
-
"line":
|
|
11436
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11437
|
+
"line": 104
|
|
11216
11438
|
},
|
|
11217
11439
|
"name": "model",
|
|
11218
11440
|
"type": {
|
|
@@ -11227,8 +11449,8 @@
|
|
|
11227
11449
|
},
|
|
11228
11450
|
"immutable": true,
|
|
11229
11451
|
"locationInModule": {
|
|
11230
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11231
|
-
"line":
|
|
11452
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11453
|
+
"line": 100
|
|
11232
11454
|
},
|
|
11233
11455
|
"name": "neuronxInstanceType",
|
|
11234
11456
|
"type": {
|
|
@@ -11243,8 +11465,8 @@
|
|
|
11243
11465
|
},
|
|
11244
11466
|
"immutable": true,
|
|
11245
11467
|
"locationInModule": {
|
|
11246
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11247
|
-
"line":
|
|
11468
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11469
|
+
"line": 83
|
|
11248
11470
|
},
|
|
11249
11471
|
"name": "vpc",
|
|
11250
11472
|
"type": {
|
|
@@ -11254,12 +11476,13 @@
|
|
|
11254
11476
|
{
|
|
11255
11477
|
"abstract": true,
|
|
11256
11478
|
"docs": {
|
|
11257
|
-
"stability": "stable"
|
|
11479
|
+
"stability": "stable",
|
|
11480
|
+
"summary": "The command to run in the container."
|
|
11258
11481
|
},
|
|
11259
11482
|
"immutable": true,
|
|
11260
11483
|
"locationInModule": {
|
|
11261
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11262
|
-
"line":
|
|
11484
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11485
|
+
"line": 112
|
|
11263
11486
|
},
|
|
11264
11487
|
"name": "command",
|
|
11265
11488
|
"optional": true,
|
|
@@ -11282,8 +11505,8 @@
|
|
|
11282
11505
|
},
|
|
11283
11506
|
"immutable": true,
|
|
11284
11507
|
"locationInModule": {
|
|
11285
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11286
|
-
"line":
|
|
11508
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11509
|
+
"line": 136
|
|
11287
11510
|
},
|
|
11288
11511
|
"name": "environment",
|
|
11289
11512
|
"optional": true,
|
|
@@ -11304,8 +11527,8 @@
|
|
|
11304
11527
|
},
|
|
11305
11528
|
"immutable": true,
|
|
11306
11529
|
"locationInModule": {
|
|
11307
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11308
|
-
"line":
|
|
11530
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11531
|
+
"line": 91
|
|
11309
11532
|
},
|
|
11310
11533
|
"name": "secrets",
|
|
11311
11534
|
"optional": true,
|
|
@@ -11328,8 +11551,8 @@
|
|
|
11328
11551
|
},
|
|
11329
11552
|
"immutable": true,
|
|
11330
11553
|
"locationInModule": {
|
|
11331
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11332
|
-
"line":
|
|
11554
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11555
|
+
"line": 123
|
|
11333
11556
|
},
|
|
11334
11557
|
"name": "spot",
|
|
11335
11558
|
"optional": true,
|
|
@@ -11340,14 +11563,14 @@
|
|
|
11340
11563
|
{
|
|
11341
11564
|
"abstract": true,
|
|
11342
11565
|
"docs": {
|
|
11343
|
-
"default": "- N
|
|
11566
|
+
"default": "- N billion parameters * 5GiB EBS",
|
|
11344
11567
|
"stability": "stable",
|
|
11345
11568
|
"summary": "The root volume of worker instance."
|
|
11346
11569
|
},
|
|
11347
11570
|
"immutable": true,
|
|
11348
11571
|
"locationInModule": {
|
|
11349
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11350
|
-
"line":
|
|
11572
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11573
|
+
"line": 117
|
|
11351
11574
|
},
|
|
11352
11575
|
"name": "volumeSize",
|
|
11353
11576
|
"optional": true,
|
|
@@ -11358,23 +11581,165 @@
|
|
|
11358
11581
|
{
|
|
11359
11582
|
"abstract": true,
|
|
11360
11583
|
"docs": {
|
|
11361
|
-
"default": "- new subnets will be created",
|
|
11584
|
+
"default": "- new subnets will be created",
|
|
11585
|
+
"stability": "stable",
|
|
11586
|
+
"summary": "The VPC Subnets this Compute Environment will launch instances in."
|
|
11587
|
+
},
|
|
11588
|
+
"immutable": true,
|
|
11589
|
+
"locationInModule": {
|
|
11590
|
+
"filename": "src/base/neuronx-compiler/neuronx-compiler-base.ts",
|
|
11591
|
+
"line": 129
|
|
11592
|
+
},
|
|
11593
|
+
"name": "vpcSubnets",
|
|
11594
|
+
"optional": true,
|
|
11595
|
+
"type": {
|
|
11596
|
+
"fqn": "aws-cdk-lib.aws_ec2.SubnetSelection"
|
|
11597
|
+
}
|
|
11598
|
+
}
|
|
11599
|
+
],
|
|
11600
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-compiler-base:NeuronxCompilerBaseProps"
|
|
11601
|
+
},
|
|
11602
|
+
"aws-cdk-neuronx-patterns.NeuronxCrossCompiler": {
|
|
11603
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
11604
|
+
"base": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
11605
|
+
"docs": {
|
|
11606
|
+
"remarks": "Compile the model on a non-Neuron instance and upload the artifacts to an S3 bucket.\nThis avoids the need for expensive Neuron instances during the compilation phase.\n\nThe compilation uses `vllm serve` which performs model tracing and neuronx-cc compilation\nentirely on CPU. The resulting artifacts are compatible with Neuron instances for inference.",
|
|
11607
|
+
"stability": "stable",
|
|
11608
|
+
"summary": "Neuronx cross-compiler construct."
|
|
11609
|
+
},
|
|
11610
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCrossCompiler",
|
|
11611
|
+
"initializer": {
|
|
11612
|
+
"docs": {
|
|
11613
|
+
"stability": "stable"
|
|
11614
|
+
},
|
|
11615
|
+
"locationInModule": {
|
|
11616
|
+
"filename": "src/base/neuronx-compiler/neuronx-cross-compiler.ts",
|
|
11617
|
+
"line": 38
|
|
11618
|
+
},
|
|
11619
|
+
"parameters": [
|
|
11620
|
+
{
|
|
11621
|
+
"name": "scope",
|
|
11622
|
+
"type": {
|
|
11623
|
+
"fqn": "constructs.Construct"
|
|
11624
|
+
}
|
|
11625
|
+
},
|
|
11626
|
+
{
|
|
11627
|
+
"name": "id",
|
|
11628
|
+
"type": {
|
|
11629
|
+
"primitive": "string"
|
|
11630
|
+
}
|
|
11631
|
+
},
|
|
11632
|
+
{
|
|
11633
|
+
"name": "props",
|
|
11634
|
+
"type": {
|
|
11635
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCrossCompilerProps"
|
|
11636
|
+
}
|
|
11637
|
+
}
|
|
11638
|
+
]
|
|
11639
|
+
},
|
|
11640
|
+
"kind": "class",
|
|
11641
|
+
"locationInModule": {
|
|
11642
|
+
"filename": "src/base/neuronx-compiler/neuronx-cross-compiler.ts",
|
|
11643
|
+
"line": 37
|
|
11644
|
+
},
|
|
11645
|
+
"methods": [
|
|
11646
|
+
{
|
|
11647
|
+
"docs": {
|
|
11648
|
+
"remarks": "Subclasses must implement this to provide the appropriate compute environment.",
|
|
11649
|
+
"stability": "stable",
|
|
11650
|
+
"summary": "Create the Batch compute environment."
|
|
11651
|
+
},
|
|
11652
|
+
"locationInModule": {
|
|
11653
|
+
"filename": "src/base/neuronx-compiler/neuronx-cross-compiler.ts",
|
|
11654
|
+
"line": 42
|
|
11655
|
+
},
|
|
11656
|
+
"name": "createComputeEnvironment",
|
|
11657
|
+
"overrides": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
11658
|
+
"parameters": [
|
|
11659
|
+
{
|
|
11660
|
+
"name": "props",
|
|
11661
|
+
"type": {
|
|
11662
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11663
|
+
}
|
|
11664
|
+
}
|
|
11665
|
+
],
|
|
11666
|
+
"protected": true,
|
|
11667
|
+
"returns": {
|
|
11668
|
+
"type": {
|
|
11669
|
+
"fqn": "aws-cdk-neuronx-patterns.ComputeEnvironmentResult"
|
|
11670
|
+
}
|
|
11671
|
+
}
|
|
11672
|
+
},
|
|
11673
|
+
{
|
|
11674
|
+
"docs": {
|
|
11675
|
+
"remarks": "Subclasses must implement this to provide the appropriate job definition.",
|
|
11676
|
+
"stability": "stable",
|
|
11677
|
+
"summary": "Create the Batch job definition."
|
|
11678
|
+
},
|
|
11679
|
+
"locationInModule": {
|
|
11680
|
+
"filename": "src/base/neuronx-compiler/neuronx-cross-compiler.ts",
|
|
11681
|
+
"line": 97
|
|
11682
|
+
},
|
|
11683
|
+
"name": "createJobDefinition",
|
|
11684
|
+
"overrides": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
11685
|
+
"parameters": [
|
|
11686
|
+
{
|
|
11687
|
+
"name": "props",
|
|
11688
|
+
"type": {
|
|
11689
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11690
|
+
}
|
|
11691
|
+
}
|
|
11692
|
+
],
|
|
11693
|
+
"protected": true,
|
|
11694
|
+
"returns": {
|
|
11695
|
+
"type": {
|
|
11696
|
+
"fqn": "aws-cdk-lib.aws_batch.IJobDefinition"
|
|
11697
|
+
}
|
|
11698
|
+
}
|
|
11699
|
+
}
|
|
11700
|
+
],
|
|
11701
|
+
"name": "NeuronxCrossCompiler",
|
|
11702
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-cross-compiler:NeuronxCrossCompiler"
|
|
11703
|
+
},
|
|
11704
|
+
"aws-cdk-neuronx-patterns.NeuronxCrossCompilerProps": {
|
|
11705
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
11706
|
+
"datatype": true,
|
|
11707
|
+
"docs": {
|
|
11708
|
+
"stability": "stable",
|
|
11709
|
+
"summary": "Props of NeuronxCrossCompiler."
|
|
11710
|
+
},
|
|
11711
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCrossCompilerProps",
|
|
11712
|
+
"interfaces": [
|
|
11713
|
+
"aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11714
|
+
],
|
|
11715
|
+
"kind": "interface",
|
|
11716
|
+
"locationInModule": {
|
|
11717
|
+
"filename": "src/base/neuronx-compiler/neuronx-cross-compiler.ts",
|
|
11718
|
+
"line": 18
|
|
11719
|
+
},
|
|
11720
|
+
"name": "NeuronxCrossCompilerProps",
|
|
11721
|
+
"properties": [
|
|
11722
|
+
{
|
|
11723
|
+
"abstract": true,
|
|
11724
|
+
"docs": {
|
|
11725
|
+
"default": "ec2.InstanceType.of(ec2.InstanceClass.C7I, ec2.InstanceSize.XLARGE4)",
|
|
11726
|
+
"remarks": "This should be a non-Neuron instance type with sufficient memory and CPU\nfor model compilation.",
|
|
11362
11727
|
"stability": "stable",
|
|
11363
|
-
"summary": "The
|
|
11728
|
+
"summary": "The EC2 instance type to use for cross-compilation."
|
|
11364
11729
|
},
|
|
11365
11730
|
"immutable": true,
|
|
11366
11731
|
"locationInModule": {
|
|
11367
|
-
"filename": "src/base/neuronx-compiler/neuronx-compiler.ts",
|
|
11368
|
-
"line":
|
|
11732
|
+
"filename": "src/base/neuronx-compiler/neuronx-cross-compiler.ts",
|
|
11733
|
+
"line": 26
|
|
11369
11734
|
},
|
|
11370
|
-
"name": "
|
|
11735
|
+
"name": "compileInstanceType",
|
|
11371
11736
|
"optional": true,
|
|
11372
11737
|
"type": {
|
|
11373
|
-
"fqn": "aws-cdk-lib.aws_ec2.
|
|
11738
|
+
"fqn": "aws-cdk-lib.aws_ec2.InstanceType"
|
|
11374
11739
|
}
|
|
11375
11740
|
}
|
|
11376
11741
|
],
|
|
11377
|
-
"symbolId": "src/base/neuronx-compiler/neuronx-compiler:
|
|
11742
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-cross-compiler:NeuronxCrossCompilerProps"
|
|
11378
11743
|
},
|
|
11379
11744
|
"aws-cdk-neuronx-patterns.NeuronxInstanceType": {
|
|
11380
11745
|
"abstract": true,
|
|
@@ -11391,7 +11756,7 @@
|
|
|
11391
11756
|
"kind": "class",
|
|
11392
11757
|
"locationInModule": {
|
|
11393
11758
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11394
|
-
"line":
|
|
11759
|
+
"line": 137
|
|
11395
11760
|
},
|
|
11396
11761
|
"name": "NeuronxInstanceType",
|
|
11397
11762
|
"properties": [
|
|
@@ -11404,7 +11769,7 @@
|
|
|
11404
11769
|
"immutable": true,
|
|
11405
11770
|
"locationInModule": {
|
|
11406
11771
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11407
|
-
"line":
|
|
11772
|
+
"line": 167
|
|
11408
11773
|
},
|
|
11409
11774
|
"name": "INF2_24XLARGE",
|
|
11410
11775
|
"static": true,
|
|
@@ -11421,7 +11786,7 @@
|
|
|
11421
11786
|
"immutable": true,
|
|
11422
11787
|
"locationInModule": {
|
|
11423
11788
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11424
|
-
"line":
|
|
11789
|
+
"line": 180
|
|
11425
11790
|
},
|
|
11426
11791
|
"name": "INF2_48XLARGE",
|
|
11427
11792
|
"static": true,
|
|
@@ -11438,7 +11803,7 @@
|
|
|
11438
11803
|
"immutable": true,
|
|
11439
11804
|
"locationInModule": {
|
|
11440
11805
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11441
|
-
"line":
|
|
11806
|
+
"line": 154
|
|
11442
11807
|
},
|
|
11443
11808
|
"name": "INF2_8XLARGE",
|
|
11444
11809
|
"static": true,
|
|
@@ -11455,7 +11820,7 @@
|
|
|
11455
11820
|
"immutable": true,
|
|
11456
11821
|
"locationInModule": {
|
|
11457
11822
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11458
|
-
"line":
|
|
11823
|
+
"line": 141
|
|
11459
11824
|
},
|
|
11460
11825
|
"name": "INF2_XLARGE",
|
|
11461
11826
|
"static": true,
|
|
@@ -11472,7 +11837,7 @@
|
|
|
11472
11837
|
"immutable": true,
|
|
11473
11838
|
"locationInModule": {
|
|
11474
11839
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11475
|
-
"line":
|
|
11840
|
+
"line": 193
|
|
11476
11841
|
},
|
|
11477
11842
|
"name": "TRN1_2XLARGE",
|
|
11478
11843
|
"static": true,
|
|
@@ -11489,17 +11854,189 @@
|
|
|
11489
11854
|
"immutable": true,
|
|
11490
11855
|
"locationInModule": {
|
|
11491
11856
|
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11492
|
-
"line":
|
|
11857
|
+
"line": 206
|
|
11493
11858
|
},
|
|
11494
11859
|
"name": "TRN1_32XLARGE",
|
|
11495
11860
|
"static": true,
|
|
11496
11861
|
"type": {
|
|
11497
11862
|
"fqn": "aws-cdk-neuronx-patterns.INeuronxInstanceType"
|
|
11498
11863
|
}
|
|
11864
|
+
},
|
|
11865
|
+
{
|
|
11866
|
+
"const": true,
|
|
11867
|
+
"docs": {
|
|
11868
|
+
"stability": "stable",
|
|
11869
|
+
"summary": "trn2.3xlarge."
|
|
11870
|
+
},
|
|
11871
|
+
"immutable": true,
|
|
11872
|
+
"locationInModule": {
|
|
11873
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11874
|
+
"line": 219
|
|
11875
|
+
},
|
|
11876
|
+
"name": "TRN2_3XLARGE",
|
|
11877
|
+
"static": true,
|
|
11878
|
+
"type": {
|
|
11879
|
+
"fqn": "aws-cdk-neuronx-patterns.INeuronxInstanceType"
|
|
11880
|
+
}
|
|
11881
|
+
},
|
|
11882
|
+
{
|
|
11883
|
+
"const": true,
|
|
11884
|
+
"docs": {
|
|
11885
|
+
"stability": "stable",
|
|
11886
|
+
"summary": "trn2.48xlarge."
|
|
11887
|
+
},
|
|
11888
|
+
"immutable": true,
|
|
11889
|
+
"locationInModule": {
|
|
11890
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11891
|
+
"line": 232
|
|
11892
|
+
},
|
|
11893
|
+
"name": "TRN2_48XLARGE",
|
|
11894
|
+
"static": true,
|
|
11895
|
+
"type": {
|
|
11896
|
+
"fqn": "aws-cdk-neuronx-patterns.INeuronxInstanceType"
|
|
11897
|
+
}
|
|
11898
|
+
},
|
|
11899
|
+
{
|
|
11900
|
+
"const": true,
|
|
11901
|
+
"docs": {
|
|
11902
|
+
"stability": "stable",
|
|
11903
|
+
"summary": "trn2u.48xlarge."
|
|
11904
|
+
},
|
|
11905
|
+
"immutable": true,
|
|
11906
|
+
"locationInModule": {
|
|
11907
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
11908
|
+
"line": 245
|
|
11909
|
+
},
|
|
11910
|
+
"name": "TRN2U_48XLARGE",
|
|
11911
|
+
"static": true,
|
|
11912
|
+
"type": {
|
|
11913
|
+
"fqn": "aws-cdk-neuronx-patterns.INeuronxInstanceType"
|
|
11914
|
+
}
|
|
11499
11915
|
}
|
|
11500
11916
|
],
|
|
11501
11917
|
"symbolId": "src/base/neuronx/neuronx-instance-type:NeuronxInstanceType"
|
|
11502
11918
|
},
|
|
11919
|
+
"aws-cdk-neuronx-patterns.NeuronxNativeCompiler": {
|
|
11920
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
11921
|
+
"base": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
11922
|
+
"docs": {
|
|
11923
|
+
"remarks": "Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.",
|
|
11924
|
+
"stability": "stable",
|
|
11925
|
+
"summary": "Neuronx compiler construct."
|
|
11926
|
+
},
|
|
11927
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxNativeCompiler",
|
|
11928
|
+
"initializer": {
|
|
11929
|
+
"docs": {
|
|
11930
|
+
"stability": "stable"
|
|
11931
|
+
},
|
|
11932
|
+
"locationInModule": {
|
|
11933
|
+
"filename": "src/base/neuronx-compiler/neuronx-native-compiler.ts",
|
|
11934
|
+
"line": 32
|
|
11935
|
+
},
|
|
11936
|
+
"parameters": [
|
|
11937
|
+
{
|
|
11938
|
+
"name": "scope",
|
|
11939
|
+
"type": {
|
|
11940
|
+
"fqn": "constructs.Construct"
|
|
11941
|
+
}
|
|
11942
|
+
},
|
|
11943
|
+
{
|
|
11944
|
+
"name": "id",
|
|
11945
|
+
"type": {
|
|
11946
|
+
"primitive": "string"
|
|
11947
|
+
}
|
|
11948
|
+
},
|
|
11949
|
+
{
|
|
11950
|
+
"name": "props",
|
|
11951
|
+
"type": {
|
|
11952
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxNativeCompilerProps"
|
|
11953
|
+
}
|
|
11954
|
+
}
|
|
11955
|
+
]
|
|
11956
|
+
},
|
|
11957
|
+
"kind": "class",
|
|
11958
|
+
"locationInModule": {
|
|
11959
|
+
"filename": "src/base/neuronx-compiler/neuronx-native-compiler.ts",
|
|
11960
|
+
"line": 31
|
|
11961
|
+
},
|
|
11962
|
+
"methods": [
|
|
11963
|
+
{
|
|
11964
|
+
"docs": {
|
|
11965
|
+
"remarks": "Subclasses must implement this to provide the appropriate compute environment.",
|
|
11966
|
+
"stability": "stable",
|
|
11967
|
+
"summary": "Create the Batch compute environment."
|
|
11968
|
+
},
|
|
11969
|
+
"locationInModule": {
|
|
11970
|
+
"filename": "src/base/neuronx-compiler/neuronx-native-compiler.ts",
|
|
11971
|
+
"line": 36
|
|
11972
|
+
},
|
|
11973
|
+
"name": "createComputeEnvironment",
|
|
11974
|
+
"overrides": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
11975
|
+
"parameters": [
|
|
11976
|
+
{
|
|
11977
|
+
"name": "props",
|
|
11978
|
+
"type": {
|
|
11979
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
11980
|
+
}
|
|
11981
|
+
}
|
|
11982
|
+
],
|
|
11983
|
+
"protected": true,
|
|
11984
|
+
"returns": {
|
|
11985
|
+
"type": {
|
|
11986
|
+
"fqn": "aws-cdk-neuronx-patterns.ComputeEnvironmentResult"
|
|
11987
|
+
}
|
|
11988
|
+
}
|
|
11989
|
+
},
|
|
11990
|
+
{
|
|
11991
|
+
"docs": {
|
|
11992
|
+
"remarks": "Subclasses must implement this to provide the appropriate job definition.",
|
|
11993
|
+
"stability": "stable",
|
|
11994
|
+
"summary": "Create the Batch job definition."
|
|
11995
|
+
},
|
|
11996
|
+
"locationInModule": {
|
|
11997
|
+
"filename": "src/base/neuronx-compiler/neuronx-native-compiler.ts",
|
|
11998
|
+
"line": 80
|
|
11999
|
+
},
|
|
12000
|
+
"name": "createJobDefinition",
|
|
12001
|
+
"overrides": "aws-cdk-neuronx-patterns.NeuronxCompilerBase",
|
|
12002
|
+
"parameters": [
|
|
12003
|
+
{
|
|
12004
|
+
"name": "props",
|
|
12005
|
+
"type": {
|
|
12006
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
12007
|
+
}
|
|
12008
|
+
}
|
|
12009
|
+
],
|
|
12010
|
+
"protected": true,
|
|
12011
|
+
"returns": {
|
|
12012
|
+
"type": {
|
|
12013
|
+
"fqn": "aws-cdk-lib.aws_batch.IJobDefinition"
|
|
12014
|
+
}
|
|
12015
|
+
}
|
|
12016
|
+
}
|
|
12017
|
+
],
|
|
12018
|
+
"name": "NeuronxNativeCompiler",
|
|
12019
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-native-compiler:NeuronxNativeCompiler"
|
|
12020
|
+
},
|
|
12021
|
+
"aws-cdk-neuronx-patterns.NeuronxNativeCompilerProps": {
|
|
12022
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
12023
|
+
"datatype": true,
|
|
12024
|
+
"docs": {
|
|
12025
|
+
"stability": "stable",
|
|
12026
|
+
"summary": "Props of NeuronxNativeCompiler."
|
|
12027
|
+
},
|
|
12028
|
+
"fqn": "aws-cdk-neuronx-patterns.NeuronxNativeCompilerProps",
|
|
12029
|
+
"interfaces": [
|
|
12030
|
+
"aws-cdk-neuronx-patterns.NeuronxCompilerBaseProps"
|
|
12031
|
+
],
|
|
12032
|
+
"kind": "interface",
|
|
12033
|
+
"locationInModule": {
|
|
12034
|
+
"filename": "src/base/neuronx-compiler/neuronx-native-compiler.ts",
|
|
12035
|
+
"line": 25
|
|
12036
|
+
},
|
|
12037
|
+
"name": "NeuronxNativeCompilerProps",
|
|
12038
|
+
"symbolId": "src/base/neuronx-compiler/neuronx-native-compiler:NeuronxNativeCompilerProps"
|
|
12039
|
+
},
|
|
11503
12040
|
"aws-cdk-neuronx-patterns.NeuronxTaskDefinition": {
|
|
11504
12041
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
11505
12042
|
"base": "aws-cdk-lib.aws_ecs.Ec2TaskDefinition",
|
|
@@ -13285,6 +13822,87 @@
|
|
|
13285
13822
|
],
|
|
13286
13823
|
"symbolId": "src/base/neuronx/neuronx-instance-type:Trainium1Chips"
|
|
13287
13824
|
},
|
|
13825
|
+
"aws-cdk-neuronx-patterns.Trainium2Chips": {
|
|
13826
|
+
"assembly": "aws-cdk-neuronx-patterns",
|
|
13827
|
+
"docs": {
|
|
13828
|
+
"stability": "stable"
|
|
13829
|
+
},
|
|
13830
|
+
"fqn": "aws-cdk-neuronx-patterns.Trainium2Chips",
|
|
13831
|
+
"initializer": {
|
|
13832
|
+
"docs": {
|
|
13833
|
+
"stability": "stable"
|
|
13834
|
+
},
|
|
13835
|
+
"locationInModule": {
|
|
13836
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
13837
|
+
"line": 34
|
|
13838
|
+
},
|
|
13839
|
+
"parameters": [
|
|
13840
|
+
{
|
|
13841
|
+
"name": "chips",
|
|
13842
|
+
"type": {
|
|
13843
|
+
"primitive": "number"
|
|
13844
|
+
}
|
|
13845
|
+
}
|
|
13846
|
+
]
|
|
13847
|
+
},
|
|
13848
|
+
"interfaces": [
|
|
13849
|
+
"aws-cdk-neuronx-patterns.IAcceleratorChips"
|
|
13850
|
+
],
|
|
13851
|
+
"kind": "class",
|
|
13852
|
+
"locationInModule": {
|
|
13853
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
13854
|
+
"line": 31
|
|
13855
|
+
},
|
|
13856
|
+
"name": "Trainium2Chips",
|
|
13857
|
+
"properties": [
|
|
13858
|
+
{
|
|
13859
|
+
"docs": {
|
|
13860
|
+
"stability": "stable"
|
|
13861
|
+
},
|
|
13862
|
+
"immutable": true,
|
|
13863
|
+
"locationInModule": {
|
|
13864
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
13865
|
+
"line": 33
|
|
13866
|
+
},
|
|
13867
|
+
"name": "acceleratorMemory",
|
|
13868
|
+
"overrides": "aws-cdk-neuronx-patterns.IAcceleratorChips",
|
|
13869
|
+
"type": {
|
|
13870
|
+
"fqn": "aws-cdk-lib.Size"
|
|
13871
|
+
}
|
|
13872
|
+
},
|
|
13873
|
+
{
|
|
13874
|
+
"docs": {
|
|
13875
|
+
"stability": "stable"
|
|
13876
|
+
},
|
|
13877
|
+
"immutable": true,
|
|
13878
|
+
"locationInModule": {
|
|
13879
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
13880
|
+
"line": 34
|
|
13881
|
+
},
|
|
13882
|
+
"name": "chips",
|
|
13883
|
+
"overrides": "aws-cdk-neuronx-patterns.IAcceleratorChips",
|
|
13884
|
+
"type": {
|
|
13885
|
+
"primitive": "number"
|
|
13886
|
+
}
|
|
13887
|
+
},
|
|
13888
|
+
{
|
|
13889
|
+
"docs": {
|
|
13890
|
+
"stability": "stable"
|
|
13891
|
+
},
|
|
13892
|
+
"immutable": true,
|
|
13893
|
+
"locationInModule": {
|
|
13894
|
+
"filename": "src/base/neuronx/neuronx-instance-type.ts",
|
|
13895
|
+
"line": 32
|
|
13896
|
+
},
|
|
13897
|
+
"name": "neuronxCores",
|
|
13898
|
+
"overrides": "aws-cdk-neuronx-patterns.IAcceleratorChips",
|
|
13899
|
+
"type": {
|
|
13900
|
+
"primitive": "number"
|
|
13901
|
+
}
|
|
13902
|
+
}
|
|
13903
|
+
],
|
|
13904
|
+
"symbolId": "src/base/neuronx/neuronx-instance-type:Trainium2Chips"
|
|
13905
|
+
},
|
|
13288
13906
|
"aws-cdk-neuronx-patterns.UvicornLogLevel": {
|
|
13289
13907
|
"assembly": "aws-cdk-neuronx-patterns",
|
|
13290
13908
|
"docs": {
|
|
@@ -15842,7 +16460,7 @@
|
|
|
15842
16460
|
},
|
|
15843
16461
|
"locationInModule": {
|
|
15844
16462
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15845
|
-
"line":
|
|
16463
|
+
"line": 76
|
|
15846
16464
|
},
|
|
15847
16465
|
"parameters": [
|
|
15848
16466
|
{
|
|
@@ -15869,7 +16487,7 @@
|
|
|
15869
16487
|
"kind": "class",
|
|
15870
16488
|
"locationInModule": {
|
|
15871
16489
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15872
|
-
"line":
|
|
16490
|
+
"line": 71
|
|
15873
16491
|
},
|
|
15874
16492
|
"name": "VllmNxdInferenceCompileImage",
|
|
15875
16493
|
"properties": [
|
|
@@ -15881,7 +16499,7 @@
|
|
|
15881
16499
|
"immutable": true,
|
|
15882
16500
|
"locationInModule": {
|
|
15883
16501
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15884
|
-
"line":
|
|
16502
|
+
"line": 75
|
|
15885
16503
|
},
|
|
15886
16504
|
"name": "image",
|
|
15887
16505
|
"overrides": "aws-cdk-neuronx-patterns.VllmNxdInferenceEcsImageBase",
|
|
@@ -15903,7 +16521,7 @@
|
|
|
15903
16521
|
"kind": "interface",
|
|
15904
16522
|
"locationInModule": {
|
|
15905
16523
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15906
|
-
"line":
|
|
16524
|
+
"line": 97
|
|
15907
16525
|
},
|
|
15908
16526
|
"name": "VllmNxdInferenceCompileProps",
|
|
15909
16527
|
"properties": [
|
|
@@ -15916,7 +16534,7 @@
|
|
|
15916
16534
|
"immutable": true,
|
|
15917
16535
|
"locationInModule": {
|
|
15918
16536
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15919
|
-
"line":
|
|
16537
|
+
"line": 105
|
|
15920
16538
|
},
|
|
15921
16539
|
"name": "bucket",
|
|
15922
16540
|
"type": {
|
|
@@ -15932,7 +16550,7 @@
|
|
|
15932
16550
|
"immutable": true,
|
|
15933
16551
|
"locationInModule": {
|
|
15934
16552
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15935
|
-
"line":
|
|
16553
|
+
"line": 113
|
|
15936
16554
|
},
|
|
15937
16555
|
"name": "model",
|
|
15938
16556
|
"type": {
|
|
@@ -15948,13 +16566,32 @@
|
|
|
15948
16566
|
"immutable": true,
|
|
15949
16567
|
"locationInModule": {
|
|
15950
16568
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15951
|
-
"line":
|
|
16569
|
+
"line": 101
|
|
15952
16570
|
},
|
|
15953
16571
|
"name": "vpc",
|
|
15954
16572
|
"type": {
|
|
15955
16573
|
"fqn": "aws-cdk-lib.aws_ec2.IVpc"
|
|
15956
16574
|
}
|
|
15957
16575
|
},
|
|
16576
|
+
{
|
|
16577
|
+
"abstract": true,
|
|
16578
|
+
"docs": {
|
|
16579
|
+
"default": "- Automatically selected based on model size",
|
|
16580
|
+
"remarks": "This should be a non-Neuron instance type with sufficient memory for model compilation.",
|
|
16581
|
+
"stability": "stable",
|
|
16582
|
+
"summary": "The EC2 instance type to use for cross-compilation."
|
|
16583
|
+
},
|
|
16584
|
+
"immutable": true,
|
|
16585
|
+
"locationInModule": {
|
|
16586
|
+
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16587
|
+
"line": 156
|
|
16588
|
+
},
|
|
16589
|
+
"name": "compileInstanceType",
|
|
16590
|
+
"optional": true,
|
|
16591
|
+
"type": {
|
|
16592
|
+
"fqn": "aws-cdk-lib.aws_ec2.InstanceType"
|
|
16593
|
+
}
|
|
16594
|
+
},
|
|
15958
16595
|
{
|
|
15959
16596
|
"abstract": true,
|
|
15960
16597
|
"docs": {
|
|
@@ -15966,7 +16603,7 @@
|
|
|
15966
16603
|
"immutable": true,
|
|
15967
16604
|
"locationInModule": {
|
|
15968
16605
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15969
|
-
"line":
|
|
16606
|
+
"line": 137
|
|
15970
16607
|
},
|
|
15971
16608
|
"name": "environment",
|
|
15972
16609
|
"optional": true,
|
|
@@ -15989,7 +16626,7 @@
|
|
|
15989
16626
|
"immutable": true,
|
|
15990
16627
|
"locationInModule": {
|
|
15991
16628
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
15992
|
-
"line":
|
|
16629
|
+
"line": 149
|
|
15993
16630
|
},
|
|
15994
16631
|
"name": "image",
|
|
15995
16632
|
"optional": true,
|
|
@@ -16006,7 +16643,7 @@
|
|
|
16006
16643
|
"immutable": true,
|
|
16007
16644
|
"locationInModule": {
|
|
16008
16645
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16009
|
-
"line":
|
|
16646
|
+
"line": 109
|
|
16010
16647
|
},
|
|
16011
16648
|
"name": "neuronxInstanceType",
|
|
16012
16649
|
"optional": true,
|
|
@@ -16025,7 +16662,7 @@
|
|
|
16025
16662
|
"immutable": true,
|
|
16026
16663
|
"locationInModule": {
|
|
16027
16664
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16028
|
-
"line":
|
|
16665
|
+
"line": 124
|
|
16029
16666
|
},
|
|
16030
16667
|
"name": "spot",
|
|
16031
16668
|
"optional": true,
|
|
@@ -16043,7 +16680,7 @@
|
|
|
16043
16680
|
"immutable": true,
|
|
16044
16681
|
"locationInModule": {
|
|
16045
16682
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16046
|
-
"line":
|
|
16683
|
+
"line": 144
|
|
16047
16684
|
},
|
|
16048
16685
|
"name": "vllmArgs",
|
|
16049
16686
|
"optional": true,
|
|
@@ -16061,7 +16698,7 @@
|
|
|
16061
16698
|
"immutable": true,
|
|
16062
16699
|
"locationInModule": {
|
|
16063
16700
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16064
|
-
"line":
|
|
16701
|
+
"line": 118
|
|
16065
16702
|
},
|
|
16066
16703
|
"name": "volumeSize",
|
|
16067
16704
|
"optional": true,
|
|
@@ -16079,7 +16716,7 @@
|
|
|
16079
16716
|
"immutable": true,
|
|
16080
16717
|
"locationInModule": {
|
|
16081
16718
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16082
|
-
"line":
|
|
16719
|
+
"line": 130
|
|
16083
16720
|
},
|
|
16084
16721
|
"name": "vpcSubnets",
|
|
16085
16722
|
"optional": true,
|
|
@@ -16104,7 +16741,7 @@
|
|
|
16104
16741
|
"kind": "interface",
|
|
16105
16742
|
"locationInModule": {
|
|
16106
16743
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16107
|
-
"line":
|
|
16744
|
+
"line": 162
|
|
16108
16745
|
},
|
|
16109
16746
|
"name": "VllmNxdInferenceCompiledModel",
|
|
16110
16747
|
"properties": [
|
|
@@ -16117,7 +16754,7 @@
|
|
|
16117
16754
|
"immutable": true,
|
|
16118
16755
|
"locationInModule": {
|
|
16119
16756
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16120
|
-
"line":
|
|
16757
|
+
"line": 166
|
|
16121
16758
|
},
|
|
16122
16759
|
"name": "vllmArgs",
|
|
16123
16760
|
"type": {
|
|
@@ -16142,7 +16779,7 @@
|
|
|
16142
16779
|
},
|
|
16143
16780
|
"locationInModule": {
|
|
16144
16781
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16145
|
-
"line":
|
|
16782
|
+
"line": 176
|
|
16146
16783
|
},
|
|
16147
16784
|
"parameters": [
|
|
16148
16785
|
{
|
|
@@ -16168,7 +16805,7 @@
|
|
|
16168
16805
|
"kind": "class",
|
|
16169
16806
|
"locationInModule": {
|
|
16170
16807
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16171
|
-
"line":
|
|
16808
|
+
"line": 173
|
|
16172
16809
|
},
|
|
16173
16810
|
"methods": [
|
|
16174
16811
|
{
|
|
@@ -16179,7 +16816,7 @@
|
|
|
16179
16816
|
},
|
|
16180
16817
|
"locationInModule": {
|
|
16181
16818
|
"filename": "src/vllm-nxd-inference/vllm-nxd-inference-compiler.ts",
|
|
16182
|
-
"line":
|
|
16819
|
+
"line": 293
|
|
16183
16820
|
},
|
|
16184
16821
|
"name": "compile",
|
|
16185
16822
|
"returns": {
|
|
@@ -17089,6 +17726,6 @@
|
|
|
17089
17726
|
"symbolId": "src/base/server-engine/vllm-engine/vllm-engine-argments:VllmTask"
|
|
17090
17727
|
}
|
|
17091
17728
|
},
|
|
17092
|
-
"version": "0.
|
|
17093
|
-
"fingerprint": "
|
|
17729
|
+
"version": "0.3.0",
|
|
17730
|
+
"fingerprint": "aK6GSL1zrw/JNphQGwQEHYhOfs7iFqHe5PGaAzykQM0="
|
|
17094
17731
|
}
|