aws-cdk-neuronx-patterns 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/.jsii +604 -99
  2. package/API.md +990 -210
  3. package/README.ja.md +18 -6
  4. package/README.md +16 -5
  5. package/lib/base/aws-batch/neuronx-batch-compute-environment.js +1 -1
  6. package/lib/base/aws-batch/neuronx-batch-ecs-job-definition.js +1 -1
  7. package/lib/base/aws-batch/neuronx-batch.js +1 -1
  8. package/lib/base/aws-ecs-patterns/application-load-balanced-neuronx-service.js +4 -4
  9. package/lib/base/neuronx/deep-learning-containers.js +3 -3
  10. package/lib/base/neuronx/model.js +2 -2
  11. package/lib/base/neuronx/neuron-optimized-machine-image.js +1 -1
  12. package/lib/base/neuronx/neuronx-instance-type.js +4 -4
  13. package/lib/base/neuronx-compiler/index.d.ts +3 -1
  14. package/lib/base/neuronx-compiler/index.js +4 -2
  15. package/lib/base/neuronx-compiler/{neuronx-compiler.d.ts → neuronx-compiler-base.d.ts} +74 -32
  16. package/lib/base/neuronx-compiler/neuronx-compiler-base.js +129 -0
  17. package/lib/base/neuronx-compiler/neuronx-cross-compiler.d.ts +30 -0
  18. package/lib/base/neuronx-compiler/neuronx-cross-compiler.js +83 -0
  19. package/lib/base/neuronx-compiler/neuronx-native-compiler.d.ts +18 -0
  20. package/lib/base/neuronx-compiler/neuronx-native-compiler.js +69 -0
  21. package/lib/base/server-engine/vllm-engine/vllm-engine-argments.js +1 -1
  22. package/lib/sagemaker-inference-toolkit-tnx/sagemaker-inference-toolkit-tnx-compiler.js +2 -2
  23. package/lib/sagemaker-inference-toolkit-tnx/sagemaker-inference-toolkit-tnx-sagemaker.d.ts +1 -1
  24. package/lib/sagemaker-inference-toolkit-tnx/sagemaker-inference-toolkit-tnx-sagemaker.js +2 -2
  25. package/lib/vllm-nxd-inference/vllm-nxd-inference-compiler.d.ts +8 -0
  26. package/lib/vllm-nxd-inference/vllm-nxd-inference-compiler.js +32 -4
  27. package/lib/vllm-nxd-inference/vllm-nxd-inference-ecs-patterns.js +6 -6
  28. package/package.json +5 -5
  29. package/scripts/compile/vllm-nxd-inference/Dockerfile +5 -0
  30. package/scripts/compile/vllm-nxd-inference/entrypoint.sh +39 -14
  31. package/lib/base/neuronx-compiler/neuronx-compiler.js +0 -166
@@ -2,18 +2,12 @@
2
2
 
3
3
  LOG_FILE=~/vllm.log
4
4
  touch $LOG_FILE
5
- wait_for_log_to_be_detected() {
6
- local SEARCH_TEXT="$1"
7
- echo "wait for \"$SEARCH_TEXT\" to be detected in \`$LOG_FILE\`..."
8
-
9
- if grep -q "$SEARCH_TEXT" <(tail -n0 -f "$LOG_FILE"); then
10
- echo "Detected target log. Execute next process."
11
- return 0
12
- else
13
- echo "Compile failed."
14
- return 1
15
- fi
16
- }
5
+
6
+ # Create a dummy neuron device for vllm-neuron plugin registration
7
+ # when running on a non-Neuron instance (cross-compilation)
8
+ if [ ! -e /dev/neuron0 ]; then
9
+ mknod /dev/neuron0 c 10 200 2>/dev/null || true
10
+ fi
17
11
 
18
12
  mkdir compile
19
13
  cd compile
@@ -27,11 +21,42 @@ fi
27
21
  python ~/vllm/quantize.py "$@"
28
22
 
29
23
  vllm serve "$@" 2>&1 | tee $LOG_FILE &
24
+ VLLM_PID=$!
25
+
26
+ # Wait for either successful startup or compilation artifacts to appear
27
+ while true; do
28
+ # Check if vllm serve has started successfully (native Neuron instance)
29
+ if grep -q "Application startup complete" "$LOG_FILE" 2>/dev/null; then
30
+ echo "Detected 'Application startup complete'. Server started successfully."
31
+ break
32
+ fi
33
+
34
+ # Check if compilation artifacts exist but load failed (cross-compilation)
35
+ if grep -q "Cannot find Neuron devices\|Neuron Runtime could not be initialized\|nrt_init" "$LOG_FILE" 2>/dev/null; then
36
+ if [ -d "$NEURON_COMPILED_ARTIFACTS" ] && [ -f "$NEURON_COMPILED_ARTIFACTS/model.pt" ]; then
37
+ echo "Compilation succeeded but Neuron device not available (cross-compilation mode)."
38
+ echo "Artifacts found at $NEURON_COMPILED_ARTIFACTS"
39
+ break
40
+ fi
41
+ fi
42
+
43
+ # Check if vllm process has exited
44
+ if ! kill -0 $VLLM_PID 2>/dev/null; then
45
+ # Process exited - check if artifacts exist
46
+ if [ -d "$NEURON_COMPILED_ARTIFACTS" ] && [ -f "$NEURON_COMPILED_ARTIFACTS/model.pt" ]; then
47
+ echo "vllm serve exited but compilation artifacts found (cross-compilation mode)."
48
+ break
49
+ else
50
+ echo "Compile failed. No artifacts found."
51
+ exit 1
52
+ fi
53
+ fi
30
54
 
31
- wait_for_log_to_be_detected "Application startup complete" || exit 1
55
+ sleep 5
56
+ done
32
57
 
33
58
  aws s3 cp --no-progress --recursive ./ $COMPILED_ARTIFACTS_S3_URI \
34
59
  --exclude "**/.cache/*" \
35
60
  --exclude global_metric_store.json
36
61
 
37
- echo 'Compile completed.'
62
+ echo 'Compile completed.'
@@ -1,166 +0,0 @@
1
- "use strict";
2
- var _a;
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.NeuronxCompiler = void 0;
5
- const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
6
- const aws_cdk_lib_1 = require("aws-cdk-lib");
7
- const batch = require("aws-cdk-lib/aws-batch");
8
- const ec2 = require("aws-cdk-lib/aws-ec2");
9
- const aws_iam_1 = require("aws-cdk-lib/aws-iam");
10
- const aws_lambda_1 = require("aws-cdk-lib/aws-lambda");
11
- const custom_resources_1 = require("aws-cdk-lib/custom-resources");
12
- const constructs_1 = require("constructs");
13
- const path_1 = require("path");
14
- const aws_batch_1 = require("../aws-batch");
15
- const neuronx_1 = require("../neuronx");
16
- /**
17
- * Neuronx compiler construct.
18
- * Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.
19
- */
20
- class NeuronxCompiler extends constructs_1.Construct {
21
- constructor(scope, id, props) {
22
- super(scope, id);
23
- const weightSize = aws_cdk_lib_1.Size.gibibytes(props.model.options.parameters.toBillion() * 2.5);
24
- const volumeSize = props.volumeSize?.toGibibytes() ??
25
- Math.ceil(weightSize.toGibibytes() +
26
- neuronx_1.PytorchTrainingNeuronxImage.size.toGibibytes() +
27
- neuronx_1.NeuronOptimizedMachineImage.size.toGibibytes());
28
- const launchTemplate = new ec2.LaunchTemplate(this, "LaunchTemplate", {
29
- blockDevices: [
30
- {
31
- deviceName: "/dev/xvda",
32
- volume: ec2.BlockDeviceVolume.ebs(volumeSize, {
33
- volumeType: ec2.EbsDeviceVolumeType.GP3,
34
- encrypted: true,
35
- }),
36
- },
37
- ],
38
- });
39
- const neuronxInstanceType = props.neuronxInstanceType ?? neuronx_1.NeuronxInstanceType.INF2_48XLARGE;
40
- const computeEnvironment = new aws_batch_1.NeuronxBatchComputeEnvironment(this, "ComputeEnvironment", {
41
- vpc: props.vpc,
42
- vpcSubnets: props.vpcSubnets,
43
- instanceTypes: [neuronxInstanceType.instanceType],
44
- useOptimalInstanceClasses: false,
45
- launchTemplate,
46
- spot: props.spot,
47
- });
48
- aws_cdk_lib_1.Tags.of(computeEnvironment).add("Name", "neuronx-compile-worker");
49
- this.jobQueue = new batch.JobQueue(this, "JobQueue", {
50
- computeEnvironments: [
51
- {
52
- computeEnvironment,
53
- order: 1,
54
- },
55
- ],
56
- jobStateTimeLimitActions: [
57
- {
58
- state: batch.JobStateTimeLimitActionsState.RUNNABLE,
59
- reason: batch.JobStateTimeLimitActionsReason.JOB_RESOURCE_REQUIREMENT,
60
- maxTime: aws_cdk_lib_1.Duration.minutes(10),
61
- action: batch.JobStateTimeLimitActionsAction.CANCEL,
62
- },
63
- ],
64
- });
65
- props.model.bucket?.grantRead(computeEnvironment.instanceRole);
66
- props.bucket.grantReadWrite(computeEnvironment.instanceRole);
67
- this.jobDefinition = new aws_batch_1.NeuronxBatchEcsJobDefinition(this, "JobDefinition", {
68
- neuronxInstanceType,
69
- image: props.image.image,
70
- // The fllowing command was executed on inf2.8xlarge
71
- // sh-5.2$ free -b
72
- // total used free shared buff/cache available
73
- // Mem: 132265766912 866320384 130341785600 667648 1057660928 130529148928
74
- // https://docs.aws.amazon.com/batch/latest/userguide/memory-management.html
75
- memory: aws_cdk_lib_1.Size.mebibytes(Math.ceil(neuronxInstanceType.memory.toMebibytes() * 0.95)),
76
- cpu: neuronxInstanceType.vCpu,
77
- environment: {
78
- NEURON_COMPILE_CACHE_URL: `${props.bucket.s3UrlForObject("neuron-compile-cache")}`,
79
- ...props.environment,
80
- },
81
- command: props.command,
82
- secrets: props.secrets,
83
- });
84
- const jobSubmitFunction = new aws_lambda_1.SingletonFunction(this, "JobSubmitFunction", {
85
- code: aws_lambda_1.Code.fromAsset((0, path_1.join)(__dirname, "private/await-compile-job")),
86
- handler: "index.onEvent",
87
- runtime: aws_lambda_1.Runtime.NODEJS_LATEST,
88
- uuid: "1361f469-5c92-4c46-9e11-5d1dbf925bac",
89
- environment: {
90
- JOB_DEFINITION_ARN: this.jobDefinition.jobDefinitionArn,
91
- JOB_QUEUE_ARN: this.jobQueue.jobQueueArn,
92
- },
93
- });
94
- this.jobDefinition.grantSubmitJob(jobSubmitFunction, this.jobQueue);
95
- const jobMonitoringFunction = new aws_lambda_1.SingletonFunction(this, "JobMonitoringFunction", {
96
- code: aws_lambda_1.Code.fromAsset((0, path_1.join)(__dirname, "private/await-compile-job")),
97
- handler: "index.isComplete",
98
- runtime: aws_lambda_1.Runtime.NODEJS_LATEST,
99
- uuid: "df16dba8-5f77-480c-a6ad-cfdf74c3de62",
100
- environment: {
101
- ARTIFACT_S3_PREFIX: props.artifactS3Prefix,
102
- },
103
- });
104
- aws_iam_1.Grant.addToPrincipal({
105
- resourceArns: ["*"],
106
- grantee: jobMonitoringFunction,
107
- actions: ["batch:DescribeJobs"],
108
- });
109
- const provider = new custom_resources_1.Provider(this, "CompileJobProvider", {
110
- onEventHandler: jobSubmitFunction,
111
- isCompleteHandler: jobMonitoringFunction,
112
- queryInterval: aws_cdk_lib_1.Duration.minutes(1),
113
- totalTimeout: aws_cdk_lib_1.Duration.hours(12),
114
- });
115
- this.entrypoint = new aws_lambda_1.SingletonFunction(this, "JobEntrypointFunction", {
116
- code: aws_lambda_1.Code.fromAsset((0, path_1.join)(__dirname, "private/await-compile-job")),
117
- handler: "index.entrypoint",
118
- environment: {
119
- PROVIDER_ARN: provider.serviceToken,
120
- },
121
- timeout: aws_cdk_lib_1.Duration.minutes(15),
122
- runtime: aws_lambda_1.Runtime.NODEJS_LATEST,
123
- uuid: "f6e66997-5042-4df1-8781-bd68b3ac5313",
124
- });
125
- aws_lambda_1.Function.fromFunctionArn(this, "ProviderFunction", provider.serviceToken).grantInvoke(this.entrypoint);
126
- this.model = props.model;
127
- this.bucket = props.bucket;
128
- this.artifactS3Prefix = props.artifactS3Prefix;
129
- this.weightSize = weightSize;
130
- this.neuronxInstanceType = neuronxInstanceType;
131
- }
132
- compile() {
133
- // when invoke multiple times
134
- if (this.compiledModel) {
135
- return this.compiledModel;
136
- }
137
- const waitConditionHandle = new aws_cdk_lib_1.CfnWaitConditionHandle(this, `WaitConditionHandle${this.artifactS3Prefix}`);
138
- const compileJob = new aws_cdk_lib_1.CustomResource(this, "NeuronxCompile", {
139
- serviceToken: this.entrypoint.functionArn,
140
- resourceType: "Custom::NeuronxCompile",
141
- properties: {
142
- waitConditionCallbackURL: waitConditionHandle.ref,
143
- },
144
- });
145
- const wait = new aws_cdk_lib_1.CfnWaitCondition(this, `WaitCondition${this.artifactS3Prefix}`, {
146
- count: 1,
147
- timeout: aws_cdk_lib_1.Duration.hours(12).toSeconds().toString(),
148
- handle: waitConditionHandle.ref,
149
- });
150
- wait.node.addDependency(compileJob);
151
- const s3Prefix = aws_cdk_lib_1.Fn.select(3, aws_cdk_lib_1.Fn.split('"', wait.attrData.toString()));
152
- this.compiledModel = {
153
- modelName: this.model.modelName,
154
- compileTimeInstanceType: this.neuronxInstanceType,
155
- bucket: this.bucket,
156
- s3Prefix,
157
- s3Uri: this.bucket.s3UrlForObject(s3Prefix),
158
- weightSize: this.weightSize,
159
- };
160
- return this.compiledModel;
161
- }
162
- }
163
- exports.NeuronxCompiler = NeuronxCompiler;
164
- _a = JSII_RTTI_SYMBOL_1;
165
- NeuronxCompiler[_a] = { fqn: "aws-cdk-neuronx-patterns.NeuronxCompiler", version: "0.2.1" };
166
- //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"neuronx-compiler.js","sourceRoot":"","sources":["../../../src/base/neuronx-compiler/neuronx-compiler.ts"],"names":[],"mappings":";;;;;AAAA,6CAQqB;AACrB,+CAA+C;AAC/C,2CAA2C;AAE3C,iDAA4C;AAC5C,uDAKgC;AAEhC,mEAAwD;AACxD,2CAAuC;AACvC,+BAA4B;AAC5B,4CAGsB;AACtB,wCAMoB;AAmGpB;;;GAGG;AACH,MAAa,eAAgB,SAAQ,sBAAS;IAU5C,YAAY,KAAgB,EAAE,EAAU,EAAE,KAA2B;QACnE,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACjB,MAAM,UAAU,GAAG,kBAAI,CAAC,SAAS,CAC/B,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,EAAE,GAAG,GAAG,CACjD,CAAC;QACF,MAAM,UAAU,GACd,KAAK,CAAC,UAAU,EAAE,WAAW,EAAE;YAC/B,IAAI,CAAC,IAAI,CACP,UAAU,CAAC,WAAW,EAAE;gBACtB,qCAA2B,CAAC,IAAI,CAAC,WAAW,EAAE;gBAC9C,qCAA2B,CAAC,IAAI,CAAC,WAAW,EAAE,CACjD,CAAC;QACJ,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,gBAAgB,EAAE;YACpE,YAAY,EAAE;gBACZ;oBACE,UAAU,EAAE,WAAW;oBACvB,MAAM,EAAE,GAAG,CAAC,iBAAiB,CAAC,GAAG,CAAC,UAAU,EAAE;wBAC5C,UAAU,EAAE,GAAG,CAAC,mBAAmB,CAAC,GAAG;wBACvC,SAAS,EAAE,IAAI;qBAChB,CAAC;iBACH;aACF;SACF,CAAC,CAAC;QAEH,MAAM,mBAAmB,GACvB,KAAK,CAAC,mBAAmB,IAAI,6BAAmB,CAAC,aAAa,CAAC;QACjE,MAAM,kBAAkB,GAAG,IAAI,0CAA8B,CAC3D,IAAI,EACJ,oBAAoB,EACpB;YACE,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,aAAa,EAAE,CAAC,mBAAmB,CAAC,YAAY,CAAC;YACjD,yBAAyB,EAAE,KAAK;YAChC,cAAc;YACd,IAAI,EAAE,KAAK,CAAC,IAAI;SACjB,CACF,CAAC;QAEF,kBAAI,CAAC,EAAE,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;QAClE,IAAI,CAAC,QAAQ,GAAG,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE;YACnD,mBAAmB,EAAE;gBACnB;oBACE,kBAAkB;oBAClB,KAAK,EAAE,CAAC;iBACT;aACF;YACD,wBAAwB,EAAE;gBACxB;oBACE,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,QAAQ;oBACnD,MAAM,EAAE,KAAK,CAAC,8BAA8B,CAAC,wBAAwB;oBACrE,OAAO,EAAE,sBAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC7B,MAAM,EAAE,KAAK,CAAC,8BAA8B,CAAC,MAAM;iBACpD;aACF;SACF,CAAC,CAAC;QACH,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,SAAS,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;QAC/D,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;QAC7D,IAAI,CAAC,aAAa,GAAG,IAAI,wCAA4B,CACnD,IAAI,EACJ,eAAe,EACf;YACE,mBAAmB;YACnB,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,KAAK;YACxB,oDAAoD;YACpD,kBAAkB;YAClB,2DAA2D;YAC3D,0EAA0E;YAC1E,4EAA4E;YAC5E,MAAM,EAAE,kBAAI,CAAC,SAAS,CACpB,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,IAAI,CAAC,CAC3D;YACD,GAAG,EAAE,mBAAmB,CAAC,IAAI;YAC7B,WAAW,EAAE;gBACX,wBAAwB,EAAE,GAAG,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,sBAAsB,CAAC,EAAE;gBAClF,GAAG,KAAK,CAAC,WAAW;aACrB;YACD,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,OAAO,EAAE,KAAK,CAAC,OAAO;SACvB,CACF,CAAC;QAEF,MAAM,iBAAiB,GAAG,IAAI,8BAAiB,CAAC,IAAI,EAAE,mBAAmB,EAAE;YACzE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,eAAe;YACxB,OAAO,EAAE,oBAAO,CAAC,aAAa;YAC9B,IAAI,EAAE,sCAAsC;YAC5C,WAAW,EAAE;gBACX,kBAAkB,EAAE,IAAI,CAAC,aAAa,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,QAAQ,CAAC,WAAW;aACzC;SACF,CAAC,CAAC;QACH,IAAI,CAAC,aAAa,CAAC,cAAc,CAAC,iBAAiB,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QACpE,MAAM,qBAAqB,GAAG,IAAI,8BAAiB,CACjD,IAAI,EACJ,uBAAuB,EACvB;YACE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,kBAAkB;YAC3B,OAAO,EAAE,oBAAO,CAAC,aAAa;YAC9B,IAAI,EAAE,sCAAsC;YAC5C,WAAW,EAAE;gBACX,kBAAkB,EAAE,KAAK,CAAC,gBAAgB;aAC3C;SACF,CACF,CAAC;QACF,eAAK,CAAC,cAAc,CAAC;YACnB,YAAY,EAAE,CAAC,GAAG,CAAC;YACnB,OAAO,EAAE,qBAAqB;YAC9B,OAAO,EAAE,CAAC,oBAAoB,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,2BAAQ,CAAC,IAAI,EAAE,oBAAoB,EAAE;YACxD,cAAc,EAAE,iBAAiB;YACjC,iBAAiB,EAAE,qBAAqB;YACxC,aAAa,EAAE,sBAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAClC,YAAY,EAAE,sBAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;SACjC,CAAC,CAAC;QACH,IAAI,CAAC,UAAU,GAAG,IAAI,8BAAiB,CAAC,IAAI,EAAE,uBAAuB,EAAE;YACrE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,kBAAkB;YAC3B,WAAW,EAAE;gBACX,YAAY,EAAE,QAAQ,CAAC,YAAY;aACpC;YACD,OAAO,EAAE,sBAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;YAC7B,OAAO,EAAE,oBAAO,CAAC,aAAa;YAC9B,IAAI,EAAE,sCAAsC;SAC7C,CAAC,CAAC;QACH,qBAAQ,CAAC,eAAe,CACtB,IAAI,EACJ,kBAAkB,EAClB,QAAQ,CAAC,YAAY,CACtB,CAAC,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAE/B,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC;QACzB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC;QAC3B,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC,gBAAgB,CAAC;QAC/C,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,mBAAmB,GAAG,mBAAmB,CAAC;IACjD,CAAC;IACD,OAAO;QACL,6BAA6B;QAC7B,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,aAAa,CAAC;QAC5B,CAAC;QACD,MAAM,mBAAmB,GAAG,IAAI,oCAAsB,CACpD,IAAI,EACJ,sBAAsB,IAAI,CAAC,gBAAgB,EAAE,CAC9C,CAAC;QACF,MAAM,UAAU,GAAG,IAAI,4BAAc,CAAC,IAAI,EAAE,gBAAgB,EAAE;YAC5D,YAAY,EAAE,IAAI,CAAC,UAAU,CAAC,WAAW;YACzC,YAAY,EAAE,wBAAwB;YACtC,UAAU,EAAE;gBACV,wBAAwB,EAAE,mBAAmB,CAAC,GAAG;aAClD;SACF,CAAC,CAAC;QACH,MAAM,IAAI,GAAG,IAAI,8BAAgB,CAC/B,IAAI,EACJ,gBAAgB,IAAI,CAAC,gBAAgB,EAAE,EACvC;YACE,KAAK,EAAE,CAAC;YACR,OAAO,EAAE,sBAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,QAAQ,EAAE;YAClD,MAAM,EAAE,mBAAmB,CAAC,GAAG;SAChC,CACF,CAAC;QACF,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,CAAC;QACpC,MAAM,QAAQ,GAAG,gBAAE,CAAC,MAAM,CAAC,CAAC,EAAE,gBAAE,CAAC,KAAK,CAAC,GAAG,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QAEvE,IAAI,CAAC,aAAa,GAAG;YACnB,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,SAAS;YAC/B,uBAAuB,EAAE,IAAI,CAAC,mBAAmB;YACjD,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,QAAQ;YACR,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,QAAQ,CAAC;YAC3C,UAAU,EAAE,IAAI,CAAC,UAAU;SAC5B,CAAC;QACF,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;;AA1LH,0CA2LC","sourcesContent":["import {\n  CfnWaitCondition,\n  CfnWaitConditionHandle,\n  CustomResource,\n  Duration,\n  Fn,\n  Size,\n  Tags,\n} from \"aws-cdk-lib\";\nimport * as batch from \"aws-cdk-lib/aws-batch\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport { ContainerImage } from \"aws-cdk-lib/aws-ecs\";\nimport { Grant } from \"aws-cdk-lib/aws-iam\";\nimport {\n  Code,\n  Function,\n  Runtime,\n  SingletonFunction,\n} from \"aws-cdk-lib/aws-lambda\";\nimport { IBucket } from \"aws-cdk-lib/aws-s3\";\nimport { Provider } from \"aws-cdk-lib/custom-resources\";\nimport { Construct } from \"constructs\";\nimport { join } from \"path\";\nimport {\n  NeuronxBatchComputeEnvironment,\n  NeuronxBatchEcsJobDefinition,\n} from \"../aws-batch\";\nimport {\n  INeuronxInstanceType,\n  Model,\n  NeuronOptimizedMachineImage,\n  NeuronxInstanceType,\n  PytorchTrainingNeuronxImage,\n} from \"../neuronx\";\n\n/**\n * Compile runtime.\n */\nexport interface INeuronxContainerImage {\n  /**\n   * An image of the container where the compile job is executed.\n   */\n  readonly image: ContainerImage;\n  /**\n   * Neuronx version included in container image.\n   */\n  readonly neuronSdkVersion: string;\n}\n\n/**\n * Props of NeuronxCompiler.\n */\nexport interface NeuronxCompilerProps {\n  /**\n   * VPC in which this will launch compile worker instance.\n   */\n  readonly vpc: ec2.IVpc;\n  /**\n   * The bucket to upload compiled artifacts.\n   */\n  readonly bucket: IBucket;\n  /**\n   * Secrets to pass to the container.\n   */\n  readonly secrets?: { [key: string]: batch.Secret };\n  /**\n   * S3 Prefix that compiled artifact uploaded.\n   * This property is not depends on compile job finish.\n   */\n  readonly artifactS3Prefix: string;\n  /**\n   * The instance type of compile worker instance.\n   */\n  readonly neuronxInstanceType: INeuronxInstanceType;\n  /**\n   * The model to be compiled.\n   */\n  readonly model: Model;\n  /**\n   * An image of the container where the compile job is executed.\n   */\n  readonly image: INeuronxContainerImage;\n  readonly command?: string[];\n  /**\n   * The root volume of worker instance.\n   * @default - N bilion parameters * 5GiB EBS\n   */\n  readonly volumeSize?: Size;\n  /**\n   * Whether or not to use spot instances. Spot instances are less expensive EC2 instances that can be reclaimed by EC2 at any time; your job will be given two minutes of notice before reclamation.\n   *\n   * @default false\n   */\n  readonly spot?: boolean;\n  /**\n   * The VPC Subnets this Compute Environment will launch instances in.\n   *\n   * @default - new subnets will be created\n   */\n  readonly vpcSubnets?: ec2.SubnetSelection;\n  /**\n   * The environment variables to pass to the container.\n   * This is only applicable when using container runtime.\n   *\n   * @default - No environment variables.\n   */\n  readonly environment?: {\n    [key: string]: string;\n  };\n}\n\nexport interface NeuronxCompiledModel {\n  readonly compileTimeInstanceType: INeuronxInstanceType;\n  /**\n   * The bucket to upload compiled artifacts.\n   */\n  readonly bucket: IBucket;\n  /**\n   * S3 URL that compiled artifact uploaded.\n   */\n  readonly s3Uri: string;\n  /**\n   * S3 prefix that compiled artifact uploaded.\n   */\n  readonly s3Prefix: string;\n  /**\n   * The model name.\n   */\n  readonly modelName: string;\n  readonly weightSize: Size;\n}\n\n/**\n * Neuronx compiler construct.\n * Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.\n */\nexport class NeuronxCompiler extends Construct {\n  private compiledModel?: NeuronxCompiledModel;\n  private readonly entrypoint: SingletonFunction;\n  private readonly jobDefinition: NeuronxBatchEcsJobDefinition;\n  private readonly jobQueue: batch.JobQueue;\n  private readonly artifactS3Prefix: string;\n  private readonly weightSize: Size;\n  private readonly neuronxInstanceType: INeuronxInstanceType;\n  private readonly model: Model;\n  private readonly bucket: IBucket;\n  constructor(scope: Construct, id: string, props: NeuronxCompilerProps) {\n    super(scope, id);\n    const weightSize = Size.gibibytes(\n      props.model.options.parameters.toBillion() * 2.5,\n    );\n    const volumeSize =\n      props.volumeSize?.toGibibytes() ??\n      Math.ceil(\n        weightSize.toGibibytes() +\n          PytorchTrainingNeuronxImage.size.toGibibytes() +\n          NeuronOptimizedMachineImage.size.toGibibytes(),\n      );\n    const launchTemplate = new ec2.LaunchTemplate(this, \"LaunchTemplate\", {\n      blockDevices: [\n        {\n          deviceName: \"/dev/xvda\",\n          volume: ec2.BlockDeviceVolume.ebs(volumeSize, {\n            volumeType: ec2.EbsDeviceVolumeType.GP3,\n            encrypted: true,\n          }),\n        },\n      ],\n    });\n\n    const neuronxInstanceType =\n      props.neuronxInstanceType ?? NeuronxInstanceType.INF2_48XLARGE;\n    const computeEnvironment = new NeuronxBatchComputeEnvironment(\n      this,\n      \"ComputeEnvironment\",\n      {\n        vpc: props.vpc,\n        vpcSubnets: props.vpcSubnets,\n        instanceTypes: [neuronxInstanceType.instanceType],\n        useOptimalInstanceClasses: false,\n        launchTemplate,\n        spot: props.spot,\n      },\n    );\n\n    Tags.of(computeEnvironment).add(\"Name\", \"neuronx-compile-worker\");\n    this.jobQueue = new batch.JobQueue(this, \"JobQueue\", {\n      computeEnvironments: [\n        {\n          computeEnvironment,\n          order: 1,\n        },\n      ],\n      jobStateTimeLimitActions: [\n        {\n          state: batch.JobStateTimeLimitActionsState.RUNNABLE,\n          reason: batch.JobStateTimeLimitActionsReason.JOB_RESOURCE_REQUIREMENT,\n          maxTime: Duration.minutes(10),\n          action: batch.JobStateTimeLimitActionsAction.CANCEL,\n        },\n      ],\n    });\n    props.model.bucket?.grantRead(computeEnvironment.instanceRole);\n    props.bucket.grantReadWrite(computeEnvironment.instanceRole);\n    this.jobDefinition = new NeuronxBatchEcsJobDefinition(\n      this,\n      \"JobDefinition\",\n      {\n        neuronxInstanceType,\n        image: props.image.image,\n        // The fllowing command was executed on inf2.8xlarge\n        // sh-5.2$ free -b\n        // \t\t\ttotal\t\t\t\t\tused\t\t\tfree\t\t\t\t\tshared\tbuff/cache\tavailable\n        // Mem:\t132265766912\t866320384\t130341785600\t667648\t1057660928\t130529148928\n        // https://docs.aws.amazon.com/batch/latest/userguide/memory-management.html\n        memory: Size.mebibytes(\n          Math.ceil(neuronxInstanceType.memory.toMebibytes() * 0.95),\n        ),\n        cpu: neuronxInstanceType.vCpu,\n        environment: {\n          NEURON_COMPILE_CACHE_URL: `${props.bucket.s3UrlForObject(\"neuron-compile-cache\")}`,\n          ...props.environment,\n        },\n        command: props.command,\n        secrets: props.secrets,\n      },\n    );\n\n    const jobSubmitFunction = new SingletonFunction(this, \"JobSubmitFunction\", {\n      code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n      handler: \"index.onEvent\",\n      runtime: Runtime.NODEJS_LATEST,\n      uuid: \"1361f469-5c92-4c46-9e11-5d1dbf925bac\",\n      environment: {\n        JOB_DEFINITION_ARN: this.jobDefinition.jobDefinitionArn,\n        JOB_QUEUE_ARN: this.jobQueue.jobQueueArn,\n      },\n    });\n    this.jobDefinition.grantSubmitJob(jobSubmitFunction, this.jobQueue);\n    const jobMonitoringFunction = new SingletonFunction(\n      this,\n      \"JobMonitoringFunction\",\n      {\n        code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n        handler: \"index.isComplete\",\n        runtime: Runtime.NODEJS_LATEST,\n        uuid: \"df16dba8-5f77-480c-a6ad-cfdf74c3de62\",\n        environment: {\n          ARTIFACT_S3_PREFIX: props.artifactS3Prefix,\n        },\n      },\n    );\n    Grant.addToPrincipal({\n      resourceArns: [\"*\"],\n      grantee: jobMonitoringFunction,\n      actions: [\"batch:DescribeJobs\"],\n    });\n    const provider = new Provider(this, \"CompileJobProvider\", {\n      onEventHandler: jobSubmitFunction,\n      isCompleteHandler: jobMonitoringFunction,\n      queryInterval: Duration.minutes(1),\n      totalTimeout: Duration.hours(12),\n    });\n    this.entrypoint = new SingletonFunction(this, \"JobEntrypointFunction\", {\n      code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n      handler: \"index.entrypoint\",\n      environment: {\n        PROVIDER_ARN: provider.serviceToken,\n      },\n      timeout: Duration.minutes(15),\n      runtime: Runtime.NODEJS_LATEST,\n      uuid: \"f6e66997-5042-4df1-8781-bd68b3ac5313\",\n    });\n    Function.fromFunctionArn(\n      this,\n      \"ProviderFunction\",\n      provider.serviceToken,\n    ).grantInvoke(this.entrypoint);\n\n    this.model = props.model;\n    this.bucket = props.bucket;\n    this.artifactS3Prefix = props.artifactS3Prefix;\n    this.weightSize = weightSize;\n    this.neuronxInstanceType = neuronxInstanceType;\n  }\n  compile() {\n    // when invoke multiple times\n    if (this.compiledModel) {\n      return this.compiledModel;\n    }\n    const waitConditionHandle = new CfnWaitConditionHandle(\n      this,\n      `WaitConditionHandle${this.artifactS3Prefix}`,\n    );\n    const compileJob = new CustomResource(this, \"NeuronxCompile\", {\n      serviceToken: this.entrypoint.functionArn,\n      resourceType: \"Custom::NeuronxCompile\",\n      properties: {\n        waitConditionCallbackURL: waitConditionHandle.ref,\n      },\n    });\n    const wait = new CfnWaitCondition(\n      this,\n      `WaitCondition${this.artifactS3Prefix}`,\n      {\n        count: 1,\n        timeout: Duration.hours(12).toSeconds().toString(),\n        handle: waitConditionHandle.ref,\n      },\n    );\n    wait.node.addDependency(compileJob);\n    const s3Prefix = Fn.select(3, Fn.split('\"', wait.attrData.toString()));\n\n    this.compiledModel = {\n      modelName: this.model.modelName,\n      compileTimeInstanceType: this.neuronxInstanceType,\n      bucket: this.bucket,\n      s3Prefix,\n      s3Uri: this.bucket.s3UrlForObject(s3Prefix),\n      weightSize: this.weightSize,\n    };\n    return this.compiledModel;\n  }\n}\n"]}