aws-cdk-neuronx-patterns 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +925 -106
- package/API.md +733 -1
- package/README.md +121 -27
- package/docs/neuronx-compile-architecture.png +0 -0
- package/lib/.types-compat/ts3.9/index.d.ts +2 -0
- package/lib/.types-compat/ts3.9/model.d.ts +97 -0
- package/lib/.types-compat/ts3.9/neuronx-compile.d.ts +15 -92
- package/lib/.types-compat/ts3.9/private/util.d.ts +2 -0
- package/lib/.types-compat/ts3.9/transformers-neuronx-sagemaker-realtime-inference.d.ts +113 -0
- package/lib/index.d.ts +2 -0
- package/lib/index.js +3 -1
- package/lib/model.d.ts +97 -0
- package/lib/model.js +93 -0
- package/lib/neuronx-compile.d.ts +15 -92
- package/lib/neuronx-compile.js +43 -156
- package/lib/neuronx-instance-type.js +2 -2
- package/lib/private/await-compile-job/index.js +2 -2
- package/lib/private/util.d.ts +2 -0
- package/lib/private/util.js +31 -0
- package/lib/transformers-neuronx-sagemaker-realtime-inference.d.ts +113 -0
- package/lib/transformers-neuronx-sagemaker-realtime-inference.js +150 -0
- package/package.json +9 -5
- package/scripts/compile/Dockerfile +10 -0
- package/scripts/compile/entrypoint.sh +9 -0
- package/scripts/inference/transformers-neuronx/Dockerfile +1 -0
- package/scripts/inference/transformers-neuronx/code/inference.py +63 -0
- package/scripts/inference/transformers-neuronx/code/requirements.txt +1 -0
- /package/scripts/{compile.py → compile/compile.py} +0 -0
package/lib/model.js
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var _a, _b;
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.Model = exports.Parameters = exports.OptLevel = exports.QuantDtype = void 0;
|
|
5
|
+
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
|
|
6
|
+
/**
|
|
7
|
+
* Quant data type.
|
|
8
|
+
*/
|
|
9
|
+
var QuantDtype;
|
|
10
|
+
(function (QuantDtype) {
|
|
11
|
+
/**
|
|
12
|
+
* int8 weight storage.
|
|
13
|
+
*/
|
|
14
|
+
QuantDtype["S8"] = "s8";
|
|
15
|
+
})(QuantDtype || (exports.QuantDtype = QuantDtype = {}));
|
|
16
|
+
/**
|
|
17
|
+
* Optimization level.
|
|
18
|
+
*/
|
|
19
|
+
var OptLevel;
|
|
20
|
+
(function (OptLevel) {
|
|
21
|
+
/**
|
|
22
|
+
* enables the core performance optimizations in the compiler, while also minimizing compile time.
|
|
23
|
+
*/
|
|
24
|
+
OptLevel[OptLevel["MINIMIZING_COMPILE_TIME"] = 1] = "MINIMIZING_COMPILE_TIME";
|
|
25
|
+
/**
|
|
26
|
+
* provides the best balance between model performance and compile time.
|
|
27
|
+
*/
|
|
28
|
+
OptLevel[OptLevel["BEST_BALANCE"] = 2] = "BEST_BALANCE";
|
|
29
|
+
/**
|
|
30
|
+
* may provide additional model execution performance but may incur longer compile times and higher host memory usage during model compilation.
|
|
31
|
+
*/
|
|
32
|
+
OptLevel[OptLevel["MODEL_EXECUTION_PERFORMANCE"] = 3] = "MODEL_EXECUTION_PERFORMANCE";
|
|
33
|
+
})(OptLevel || (exports.OptLevel = OptLevel = {}));
|
|
34
|
+
/**
|
|
35
|
+
* Represents the amount of parameters.
|
|
36
|
+
*/
|
|
37
|
+
class Parameters {
|
|
38
|
+
/**
|
|
39
|
+
* Create a Parameters representing an amount bilion.
|
|
40
|
+
* @param parameters number of parameters bilionX
|
|
41
|
+
* @returns parameters
|
|
42
|
+
*/
|
|
43
|
+
static billion(parameters) {
|
|
44
|
+
return new Parameters(parameters);
|
|
45
|
+
}
|
|
46
|
+
constructor(billion) {
|
|
47
|
+
this.billion = billion;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Return this number of parameters as bilion.
|
|
51
|
+
* @returns This number of parameters as bilion.
|
|
52
|
+
*/
|
|
53
|
+
toBilion() {
|
|
54
|
+
return this.billion;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
exports.Parameters = Parameters;
|
|
58
|
+
_a = JSII_RTTI_SYMBOL_1;
|
|
59
|
+
Parameters[_a] = { fqn: "aws-cdk-neuronx-patterns.Parameters", version: "0.0.4" };
|
|
60
|
+
/**
|
|
61
|
+
* Compile target model.
|
|
62
|
+
*/
|
|
63
|
+
class Model {
|
|
64
|
+
/**
|
|
65
|
+
* model informations at HuggingFace
|
|
66
|
+
* @param modelId model id on the HuggingFace
|
|
67
|
+
* @param options model basic infromation
|
|
68
|
+
* @returns model instance
|
|
69
|
+
*/
|
|
70
|
+
static fromHuggingFace(modelId, options) {
|
|
71
|
+
return new Model(modelId, options);
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* model informations at S3 Bucket
|
|
75
|
+
* @param bucket Model stored S3 Bucket
|
|
76
|
+
* @param prefix Model stored objects prefix
|
|
77
|
+
* @param options model basic infromation
|
|
78
|
+
* @returns model instance
|
|
79
|
+
*/
|
|
80
|
+
static fromBucket(bucket, prefix, options) {
|
|
81
|
+
return new Model(bucket.s3UrlForObject(prefix), options, bucket, prefix);
|
|
82
|
+
}
|
|
83
|
+
constructor(modelId, options, bucket, prefix) {
|
|
84
|
+
this.modelId = modelId;
|
|
85
|
+
this.options = options;
|
|
86
|
+
this.bucket = bucket;
|
|
87
|
+
this.prefix = prefix;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
exports.Model = Model;
|
|
91
|
+
_b = JSII_RTTI_SYMBOL_1;
|
|
92
|
+
Model[_b] = { fqn: "aws-cdk-neuronx-patterns.Model", version: "0.0.4" };
|
|
93
|
+
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoibW9kZWwuanMiLCJzb3VyY2VSb290IjoiIiwic291cmNlcyI6WyIuLi9zcmMvbW9kZWwudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6Ijs7Ozs7QUFFQTs7R0FFRztBQUNILElBQVksVUFLWDtBQUxELFdBQVksVUFBVTtJQUNwQjs7T0FFRztJQUNILHVCQUFTLENBQUE7QUFDWCxDQUFDLEVBTFcsVUFBVSwwQkFBVixVQUFVLFFBS3JCO0FBRUQ7O0dBRUc7QUFDSCxJQUFZLFFBYVg7QUFiRCxXQUFZLFFBQVE7SUFDbEI7O09BRUc7SUFDSCw2RUFBMkIsQ0FBQTtJQUMzQjs7T0FFRztJQUNILHVEQUFnQixDQUFBO0lBQ2hCOztPQUVHO0lBQ0gscUZBQStCLENBQUE7QUFDakMsQ0FBQyxFQWJXLFFBQVEsd0JBQVIsUUFBUSxRQWFuQjtBQXdCRDs7R0FFRztBQUNILE1BQWEsVUFBVTtJQUNyQjs7OztPQUlHO0lBQ0gsTUFBTSxDQUFDLE9BQU8sQ0FBQyxVQUFrQjtRQUMvQixPQUFPLElBQUksVUFBVSxDQUFDLFVBQVUsQ0FBQyxDQUFDO0lBQ3BDLENBQUM7SUFDRCxZQUFxQyxPQUFlO1FBQWYsWUFBTyxHQUFQLE9BQU8sQ0FBUTtJQUFHLENBQUM7SUFDeEQ7OztPQUdHO0lBQ0gsUUFBUTtRQUNOLE9BQU8sSUFBSSxDQUFDLE9BQU8sQ0FBQztJQUN0QixDQUFDOztBQWhCSCxnQ0FpQkM7OztBQVFEOztHQUVHO0FBQ0gsTUFBYSxLQUFLO0lBQ2hCOzs7OztPQUtHO0lBQ0gsTUFBTSxDQUFDLGVBQWUsQ0FBQyxPQUFlLEVBQUUsT0FBcUI7UUFDM0QsT0FBTyxJQUFJLEtBQUssQ0FBQyxPQUFPLEVBQUUsT0FBTyxDQUFDLENBQUM7SUFDckMsQ0FBQztJQUNEOzs7Ozs7T0FNRztJQUNILE1BQU0sQ0FBQyxVQUFVLENBQUMsTUFBZSxFQUFFLE1BQWMsRUFBRSxPQUFxQjtRQUN0RSxPQUFPLElBQUksS0FBSyxDQUFDLE1BQU0sQ0FBQyxjQUFjLENBQUMsTUFBTSxDQUFDLEVBQUUsT0FBTyxFQUFFLE1BQU0sRUFBRSxNQUFNLENBQUMsQ0FBQztJQUMzRSxDQUFDO0lBQ0QsWUFDVyxPQUFlLEVBQ2YsT0FBcUIsRUFDckIsTUFBZ0IsRUFDaEIsTUFBZTtRQUhmLFlBQU8sR0FBUCxPQUFPLENBQVE7UUFDZixZQUFPLEdBQVAsT0FBTyxDQUFjO1FBQ3JCLFdBQU0sR0FBTixNQUFNLENBQVU7UUFDaEIsV0FBTSxHQUFOLE1BQU0sQ0FBUztJQUN2QixDQUFDOztBQXpCTixzQkEwQkMiLCJzb3VyY2VzQ29udGVudCI6WyJpbXBvcnQgeyBJQnVja2V0IH0gZnJvbSBcImF3cy1jZGstbGliL2F3cy1zM1wiO1xuXG4vKipcbiAqIFF1YW50IGRhdGEgdHlwZS5cbiAqL1xuZXhwb3J0IGVudW0gUXVhbnREdHlwZSB7XG4gIC8qKlxuICAgKiBpbnQ4IHdlaWdodCBzdG9yYWdlLlxuICAgKi9cbiAgUzggPSBcInM4XCIsXG59XG5cbi8qKlxuICogT3B0aW1pemF0aW9uIGxldmVsLlxuICovXG5leHBvcnQgZW51bSBPcHRMZXZlbCB7XG4gIC8qKlxuICAgKiBlbmFibGVzIHRoZSBjb3JlIHBlcmZvcm1hbmNlIG9wdGltaXphdGlvbnMgaW4gdGhlIGNvbXBpbGVyLCB3aGlsZSBhbHNvIG1pbmltaXppbmcgY29tcGlsZSB0aW1lLlxuICAgKi9cbiAgTUlOSU1JWklOR19DT01QSUxFX1RJTUUgPSAxLFxuICAvKipcbiAgICogcHJvdmlkZXMgdGhlIGJlc3QgYmFsYW5jZSBiZXR3ZWVuIG1vZGVsIHBlcmZvcm1hbmNlIGFuZCBjb21waWxlIHRpbWUuXG4gICAqL1xuICBCRVNUX0JBTEFOQ0UgPSAyLFxuICAvKipcbiAgICogbWF5IHByb3ZpZGUgYWRkaXRpb25hbCBtb2RlbCBleGVjdXRpb24gcGVyZm9ybWFuY2UgYnV0IG1heSBpbmN1ciBsb25nZXIgY29tcGlsZSB0aW1lcyBhbmQgaGlnaGVyIGhvc3QgbWVtb3J5IHVzYWdlIGR1cmluZyBtb2RlbCBjb21waWxhdGlvbi5cbiAgICovXG4gIE1PREVMX0VYRUNVVElPTl9QRVJGT1JNQU5DRSA9IDMsXG59XG5cbi8qKlxuICogQ29tcGlsZSBvcHRpb25zLlxuICovXG5leHBvcnQgaW50ZXJmYWNlIENvbXBpbGVPcHRpb25zIHtcbiAgLyoqXG4gICAqIEBkZWZhdWx0IC0gY2FsYyBmcm9tIHBhcmFtZXRlcnMgYW5kIHF1YW50RHR5cGVcbiAgICovXG4gIHJlYWRvbmx5IHRwRGVncmVlPzogbnVtYmVyO1xuICAvKipcbiAgICogQGRlZmF1bHQgLSBObyBxdWFudFxuICAgKi9cbiAgcmVhZG9ubHkgcXVhbnREdHlwZT86IFF1YW50RHR5cGU7XG4gIC8qKlxuICAgKiBAZGVmYXVsdCA0MDk2XG4gICAqL1xuICByZWFkb25seSBuUG9zaXRpb25zPzogbnVtYmVyO1xuICAvKipcbiAgICogQGRlZmF1bHQgT3B0TGV2ZWwuQkVTVF9CQUxBTkNFXG4gICAqL1xuICByZWFkb25seSBvcHRMZXZlbD86IE9wdExldmVsO1xufVxuXG4vKipcbiAqIFJlcHJlc2VudHMgdGhlIGFtb3VudCBvZiBwYXJhbWV0ZXJzLlxuICovXG5leHBvcnQgY2xhc3MgUGFyYW1ldGVycyB7XG4gIC8qKlxuICAgKiBDcmVhdGUgYSBQYXJhbWV0ZXJzIHJlcHJlc2VudGluZyBhbiBhbW91bnQgYmlsaW9uLlxuICAgKiBAcGFyYW0gcGFyYW1ldGVycyBudW1iZXIgb2YgcGFyYW1ldGVycyBiaWxpb25YXG4gICAqIEByZXR1cm5zIHBhcmFtZXRlcnNcbiAgICovXG4gIHN0YXRpYyBiaWxsaW9uKHBhcmFtZXRlcnM6IG51bWJlcikge1xuICAgIHJldHVybiBuZXcgUGFyYW1ldGVycyhwYXJhbWV0ZXJzKTtcbiAgfVxuICBwcml2YXRlIGNvbnN0cnVjdG9yKHByaXZhdGUgcmVhZG9ubHkgYmlsbGlvbjogbnVtYmVyKSB7fVxuICAvKipcbiAgICogUmV0dXJuIHRoaXMgbnVtYmVyIG9mIHBhcmFtZXRlcnMgYXMgYmlsaW9uLlxuICAgKiBAcmV0dXJucyBUaGlzIG51bWJlciBvZiBwYXJhbWV0ZXJzIGFzIGJpbGlvbi5cbiAgICovXG4gIHRvQmlsaW9uKCkge1xuICAgIHJldHVybiB0aGlzLmJpbGxpb247XG4gIH1cbn1cblxuLyoqXG4gKiBDb21waWxlIHRhcmdldCBtb2RlbCBiYXNpYyBpbmZyb21hdGlvblxuICovXG5leHBvcnQgaW50ZXJmYWNlIE1vZGVsT3B0aW9ucyB7XG4gIHJlYWRvbmx5IHBhcmFtZXRlcnM6IFBhcmFtZXRlcnM7XG59XG4vKipcbiAqIENvbXBpbGUgdGFyZ2V0IG1vZGVsLlxuICovXG5leHBvcnQgY2xhc3MgTW9kZWwge1xuICAvKipcbiAgICogbW9kZWwgaW5mb3JtYXRpb25zIGF0IEh1Z2dpbmdGYWNlXG4gICAqIEBwYXJhbSBtb2RlbElkIG1vZGVsIGlkIG9uIHRoZSBIdWdnaW5nRmFjZVxuICAgKiBAcGFyYW0gb3B0aW9ucyBtb2RlbCBiYXNpYyBpbmZyb21hdGlvblxuICAgKiBAcmV0dXJucyBtb2RlbCBpbnN0YW5jZVxuICAgKi9cbiAgc3RhdGljIGZyb21IdWdnaW5nRmFjZShtb2RlbElkOiBzdHJpbmcsIG9wdGlvbnM6IE1vZGVsT3B0aW9ucykge1xuICAgIHJldHVybiBuZXcgTW9kZWwobW9kZWxJZCwgb3B0aW9ucyk7XG4gIH1cbiAgLyoqXG4gICAqIG1vZGVsIGluZm9ybWF0aW9ucyBhdCBTMyBCdWNrZXRcbiAgICogQHBhcmFtIGJ1Y2tldCBNb2RlbCBzdG9yZWQgUzMgQnVja2V0XG4gICAqIEBwYXJhbSBwcmVmaXggTW9kZWwgc3RvcmVkIG9iamVjdHMgcHJlZml4XG4gICAqIEBwYXJhbSBvcHRpb25zIG1vZGVsIGJhc2ljIGluZnJvbWF0aW9uXG4gICAqIEByZXR1cm5zIG1vZGVsIGluc3RhbmNlXG4gICAqL1xuICBzdGF0aWMgZnJvbUJ1Y2tldChidWNrZXQ6IElCdWNrZXQsIHByZWZpeDogc3RyaW5nLCBvcHRpb25zOiBNb2RlbE9wdGlvbnMpIHtcbiAgICByZXR1cm4gbmV3IE1vZGVsKGJ1Y2tldC5zM1VybEZvck9iamVjdChwcmVmaXgpLCBvcHRpb25zLCBidWNrZXQsIHByZWZpeCk7XG4gIH1cbiAgcHJpdmF0ZSBjb25zdHJ1Y3RvcihcbiAgICByZWFkb25seSBtb2RlbElkOiBzdHJpbmcsXG4gICAgcmVhZG9ubHkgb3B0aW9uczogTW9kZWxPcHRpb25zLFxuICAgIHJlYWRvbmx5IGJ1Y2tldD86IElCdWNrZXQsXG4gICAgcmVhZG9ubHkgcHJlZml4Pzogc3RyaW5nLFxuICApIHt9XG59XG4iXX0=
|
package/lib/neuronx-compile.d.ts
CHANGED
|
@@ -3,6 +3,7 @@ import * as ec2 from "aws-cdk-lib/aws-ec2";
|
|
|
3
3
|
import { ContainerImage } from "aws-cdk-lib/aws-ecs";
|
|
4
4
|
import { IBucket } from "aws-cdk-lib/aws-s3";
|
|
5
5
|
import { Construct } from "constructs";
|
|
6
|
+
import { CompileOptions, Model, OptLevel, Parameters, QuantDtype } from "./model";
|
|
6
7
|
import { NeuronxInstanceType } from "./neuronx-instance-type";
|
|
7
8
|
/**
|
|
8
9
|
* Compile runtime.
|
|
@@ -17,92 +18,6 @@ export interface CompileRuntime {
|
|
|
17
18
|
*/
|
|
18
19
|
readonly neuronxVersion: string;
|
|
19
20
|
}
|
|
20
|
-
/**
|
|
21
|
-
* Quant data type.
|
|
22
|
-
*/
|
|
23
|
-
export declare enum QuantDtype {
|
|
24
|
-
/**
|
|
25
|
-
* int8 weight storage.
|
|
26
|
-
*/
|
|
27
|
-
S8 = "s8"
|
|
28
|
-
}
|
|
29
|
-
/**
|
|
30
|
-
* Optimization level.
|
|
31
|
-
*/
|
|
32
|
-
export declare enum OptLevel {
|
|
33
|
-
/**
|
|
34
|
-
* enables the core performance optimizations in the compiler, while also minimizing compile time.
|
|
35
|
-
*/
|
|
36
|
-
MINIMIZING_COMPILE_TIME = 1,
|
|
37
|
-
/**
|
|
38
|
-
* provides the best balance between model performance and compile time.
|
|
39
|
-
*/
|
|
40
|
-
BEST_BALANCE = 2,
|
|
41
|
-
/**
|
|
42
|
-
* may provide additional model execution performance but may incur longer compile times and higher host memory usage during model compilation.
|
|
43
|
-
*/
|
|
44
|
-
MODEL_EXECUTION_PERFORMANCE = 3
|
|
45
|
-
}
|
|
46
|
-
/**
|
|
47
|
-
* Compile options.
|
|
48
|
-
*/
|
|
49
|
-
export interface CompileOptions {
|
|
50
|
-
/**
|
|
51
|
-
* @default - calc from parameters and quantDtype
|
|
52
|
-
*/
|
|
53
|
-
readonly tpDegree?: number;
|
|
54
|
-
/**
|
|
55
|
-
* @default - No quant
|
|
56
|
-
*/
|
|
57
|
-
readonly quantDtype?: QuantDtype;
|
|
58
|
-
/**
|
|
59
|
-
* @default 4092
|
|
60
|
-
*/
|
|
61
|
-
readonly nPositions?: number;
|
|
62
|
-
/**
|
|
63
|
-
* @default OptLevel.BEST_BALANCE
|
|
64
|
-
*/
|
|
65
|
-
readonly optLevel?: OptLevel;
|
|
66
|
-
}
|
|
67
|
-
/**
|
|
68
|
-
* Represents the amount of parameters.
|
|
69
|
-
*/
|
|
70
|
-
export declare class Parameters {
|
|
71
|
-
private readonly billion;
|
|
72
|
-
/**
|
|
73
|
-
* Create a Parameters representing an amount bilion.
|
|
74
|
-
* @param parameters number of parameters bilionX
|
|
75
|
-
* @returns parameters
|
|
76
|
-
*/
|
|
77
|
-
static billion(parameters: number): Parameters;
|
|
78
|
-
private constructor();
|
|
79
|
-
/**
|
|
80
|
-
* Return this number of parameters as bilion.
|
|
81
|
-
* @returns This number of parameters as bilion.
|
|
82
|
-
*/
|
|
83
|
-
toBilion(): number;
|
|
84
|
-
}
|
|
85
|
-
/**
|
|
86
|
-
* Compile target model basic infromation
|
|
87
|
-
*/
|
|
88
|
-
export interface ModelOptions {
|
|
89
|
-
readonly parameters: Parameters;
|
|
90
|
-
}
|
|
91
|
-
/**
|
|
92
|
-
* Compile target model.
|
|
93
|
-
*/
|
|
94
|
-
export declare class Model {
|
|
95
|
-
readonly modelId: string;
|
|
96
|
-
readonly options: ModelOptions;
|
|
97
|
-
/**
|
|
98
|
-
* model informations at HuggingFace
|
|
99
|
-
* @param modelId model id on the HuggingFace
|
|
100
|
-
* @param options model basic infromation
|
|
101
|
-
* @returns model instance
|
|
102
|
-
*/
|
|
103
|
-
static fromHuggingFace(modelId: string, options: ModelOptions): Model;
|
|
104
|
-
private constructor();
|
|
105
|
-
}
|
|
106
21
|
/**
|
|
107
22
|
* Props of NeuronxCompile.
|
|
108
23
|
*/
|
|
@@ -111,10 +26,6 @@ export interface NeuronxCompileProps {
|
|
|
111
26
|
* VPC in which this will launch compile worker instance.
|
|
112
27
|
*/
|
|
113
28
|
readonly vpc: ec2.IVpc;
|
|
114
|
-
/**
|
|
115
|
-
* The instance type of compile worker instance.
|
|
116
|
-
*/
|
|
117
|
-
readonly instanceType?: NeuronxInstanceType;
|
|
118
29
|
/**
|
|
119
30
|
* The bucket to upload compiled artifacts.
|
|
120
31
|
*/
|
|
@@ -123,6 +34,10 @@ export interface NeuronxCompileProps {
|
|
|
123
34
|
* The model to be compiled.
|
|
124
35
|
*/
|
|
125
36
|
readonly model: Model;
|
|
37
|
+
/**
|
|
38
|
+
* The instance type of compile worker instance.
|
|
39
|
+
*/
|
|
40
|
+
readonly instanceType?: NeuronxInstanceType;
|
|
126
41
|
/**
|
|
127
42
|
* The root volume of worker instance.
|
|
128
43
|
* @default - N bilion parameters * 5GiB EBS
|
|
@@ -155,12 +70,20 @@ export interface NeuronxCompileProps {
|
|
|
155
70
|
* Neuronx compile construct. Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.
|
|
156
71
|
*/
|
|
157
72
|
export declare class NeuronxCompile extends Construct {
|
|
73
|
+
readonly compiledArtifactS3Bucket: IBucket;
|
|
158
74
|
/**
|
|
159
75
|
* S3 URL that compiled artifact uploaded.
|
|
160
76
|
*/
|
|
161
77
|
readonly compiledArtifactS3Url: string;
|
|
78
|
+
/**
|
|
79
|
+
* S3 Prefix that compiled artifact uploaded.
|
|
80
|
+
*/
|
|
81
|
+
readonly compiledArtifactS3Prefix: string;
|
|
82
|
+
readonly tpDegree: number;
|
|
83
|
+
readonly quantDtype?: QuantDtype;
|
|
84
|
+
readonly nPositions: number;
|
|
85
|
+
readonly optLevel: OptLevel;
|
|
86
|
+
readonly parameters: Parameters;
|
|
162
87
|
constructor(scope: Construct, id: string, props: NeuronxCompileProps);
|
|
163
|
-
private connectAcceleratorChips;
|
|
164
|
-
private calcTpDegree;
|
|
165
88
|
private selectInstanceTypeByTpDegree;
|
|
166
89
|
}
|
package/lib/neuronx-compile.js
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var _a
|
|
2
|
+
var _a;
|
|
3
3
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
exports.NeuronxCompile =
|
|
4
|
+
exports.NeuronxCompile = void 0;
|
|
5
5
|
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
|
|
6
|
-
const fs_1 = require("fs");
|
|
7
6
|
const path_1 = require("path");
|
|
8
7
|
const aws_cdk_lib_1 = require("aws-cdk-lib");
|
|
9
8
|
const batch = require("aws-cdk-lib/aws-batch");
|
|
@@ -13,98 +12,28 @@ const aws_iam_1 = require("aws-cdk-lib/aws-iam");
|
|
|
13
12
|
const aws_lambda_1 = require("aws-cdk-lib/aws-lambda");
|
|
14
13
|
const custom_resources_1 = require("aws-cdk-lib/custom-resources");
|
|
15
14
|
const constructs_1 = require("constructs");
|
|
15
|
+
const model_1 = require("./model");
|
|
16
16
|
const neuronx_instance_type_1 = require("./neuronx-instance-type");
|
|
17
17
|
const neuron_optimized_machine_image_1 = require("./private/neuron-optimized-machine-image");
|
|
18
|
-
|
|
19
|
-
* Quant data type.
|
|
20
|
-
*/
|
|
21
|
-
var QuantDtype;
|
|
22
|
-
(function (QuantDtype) {
|
|
23
|
-
/**
|
|
24
|
-
* int8 weight storage.
|
|
25
|
-
*/
|
|
26
|
-
QuantDtype["S8"] = "s8";
|
|
27
|
-
})(QuantDtype || (exports.QuantDtype = QuantDtype = {}));
|
|
28
|
-
/**
|
|
29
|
-
* Optimization level.
|
|
30
|
-
*/
|
|
31
|
-
var OptLevel;
|
|
32
|
-
(function (OptLevel) {
|
|
33
|
-
/**
|
|
34
|
-
* enables the core performance optimizations in the compiler, while also minimizing compile time.
|
|
35
|
-
*/
|
|
36
|
-
OptLevel[OptLevel["MINIMIZING_COMPILE_TIME"] = 1] = "MINIMIZING_COMPILE_TIME";
|
|
37
|
-
/**
|
|
38
|
-
* provides the best balance between model performance and compile time.
|
|
39
|
-
*/
|
|
40
|
-
OptLevel[OptLevel["BEST_BALANCE"] = 2] = "BEST_BALANCE";
|
|
41
|
-
/**
|
|
42
|
-
* may provide additional model execution performance but may incur longer compile times and higher host memory usage during model compilation.
|
|
43
|
-
*/
|
|
44
|
-
OptLevel[OptLevel["MODEL_EXECUTION_PERFORMANCE"] = 3] = "MODEL_EXECUTION_PERFORMANCE";
|
|
45
|
-
})(OptLevel || (exports.OptLevel = OptLevel = {}));
|
|
46
|
-
/**
|
|
47
|
-
* Represents the amount of parameters.
|
|
48
|
-
*/
|
|
49
|
-
class Parameters {
|
|
50
|
-
/**
|
|
51
|
-
* Create a Parameters representing an amount bilion.
|
|
52
|
-
* @param parameters number of parameters bilionX
|
|
53
|
-
* @returns parameters
|
|
54
|
-
*/
|
|
55
|
-
static billion(parameters) {
|
|
56
|
-
return new Parameters(parameters);
|
|
57
|
-
}
|
|
58
|
-
constructor(billion) {
|
|
59
|
-
this.billion = billion;
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* Return this number of parameters as bilion.
|
|
63
|
-
* @returns This number of parameters as bilion.
|
|
64
|
-
*/
|
|
65
|
-
toBilion() {
|
|
66
|
-
return this.billion;
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
exports.Parameters = Parameters;
|
|
70
|
-
_a = JSII_RTTI_SYMBOL_1;
|
|
71
|
-
Parameters[_a] = { fqn: "aws-cdk-neuronx-patterns.Parameters", version: "0.0.2" };
|
|
72
|
-
/**
|
|
73
|
-
* Compile target model.
|
|
74
|
-
*/
|
|
75
|
-
class Model {
|
|
76
|
-
/**
|
|
77
|
-
* model informations at HuggingFace
|
|
78
|
-
* @param modelId model id on the HuggingFace
|
|
79
|
-
* @param options model basic infromation
|
|
80
|
-
* @returns model instance
|
|
81
|
-
*/
|
|
82
|
-
static fromHuggingFace(modelId, options) {
|
|
83
|
-
return new Model(modelId, options);
|
|
84
|
-
}
|
|
85
|
-
constructor(modelId, options) {
|
|
86
|
-
this.modelId = modelId;
|
|
87
|
-
this.options = options;
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
exports.Model = Model;
|
|
91
|
-
_b = JSII_RTTI_SYMBOL_1;
|
|
92
|
-
Model[_b] = { fqn: "aws-cdk-neuronx-patterns.Model", version: "0.0.2" };
|
|
18
|
+
const util_1 = require("./private/util");
|
|
93
19
|
/**
|
|
94
20
|
* Neuronx compile construct. Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.
|
|
95
21
|
*/
|
|
96
22
|
class NeuronxCompile extends constructs_1.Construct {
|
|
97
23
|
constructor(scope, id, props) {
|
|
98
24
|
super(scope, id);
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
25
|
+
this.parameters = props.model.options.parameters;
|
|
26
|
+
this.compiledArtifactS3Bucket = props.bucket;
|
|
27
|
+
this.nPositions = props.compileOptions?.nPositions ?? 4096;
|
|
28
|
+
this.quantDtype = props.compileOptions?.quantDtype;
|
|
29
|
+
this.optLevel = props.compileOptions?.optLevel ?? model_1.OptLevel.BEST_BALANCE;
|
|
30
|
+
this.tpDegree =
|
|
31
|
+
props.compileOptions?.tpDegree ??
|
|
32
|
+
(0, util_1.calcTpDegree)(props.model.options.parameters, {
|
|
33
|
+
nPositions: this.nPositions,
|
|
34
|
+
quantDtype: this.quantDtype,
|
|
35
|
+
});
|
|
36
|
+
const instanceType = props.instanceType ?? this.selectInstanceTypeByTpDegree(this.tpDegree);
|
|
108
37
|
const launchTemplate = new ec2.LaunchTemplate(this, "LaunchTemplate", {
|
|
109
38
|
blockDevices: [
|
|
110
39
|
{
|
|
@@ -148,16 +77,25 @@ class NeuronxCompile extends constructs_1.Construct {
|
|
|
148
77
|
],
|
|
149
78
|
});
|
|
150
79
|
const runtime = props.runtime ?? {
|
|
151
|
-
image: aws_ecs_1.ContainerImage.
|
|
152
|
-
neuronxVersion: "2.19.
|
|
153
|
-
neuronxTransformersVersion: "0.11.351",
|
|
80
|
+
image: aws_ecs_1.ContainerImage.fromAsset((0, path_1.join)(__dirname, "../scripts/compile")),
|
|
81
|
+
neuronxVersion: "2.19.1",
|
|
154
82
|
};
|
|
155
|
-
let compiledArtifactPathPrefix = `${props.model.modelId}/neuronx-${runtime.neuronxVersion}/tp${tpDegree}-np${nPositions}-opt${optLevel}`;
|
|
156
|
-
if (quantDtype) {
|
|
157
|
-
compiledArtifactPathPrefix = `${compiledArtifactPathPrefix}-quant${quantDtype}`;
|
|
83
|
+
let compiledArtifactPathPrefix = `${props.model.modelId}/neuronx-${runtime.neuronxVersion}/tp${this.tpDegree}-np${this.nPositions}-opt${this.optLevel}`;
|
|
84
|
+
if (this.quantDtype) {
|
|
85
|
+
compiledArtifactPathPrefix = `${compiledArtifactPathPrefix}-quant${this.quantDtype}`;
|
|
158
86
|
}
|
|
159
87
|
props.bucket.grantReadWrite(computeEnvironment.instanceRole, `${compiledArtifactPathPrefix}/*`);
|
|
160
|
-
const
|
|
88
|
+
const linuxParameters = new batch.LinuxParameters(this, "LinuxParameters");
|
|
89
|
+
linuxParameters.addDevices(...Array.from({
|
|
90
|
+
length: instanceType.acceleratorChips.chips,
|
|
91
|
+
}).map((_, index) => ({
|
|
92
|
+
hostPath: `/dev/neuron${index}`,
|
|
93
|
+
containerPath: `/dev/neuron${index}`,
|
|
94
|
+
permissions: [
|
|
95
|
+
batch.DevicePermission.READ,
|
|
96
|
+
batch.DevicePermission.WRITE,
|
|
97
|
+
],
|
|
98
|
+
})));
|
|
161
99
|
const jobDefinition = new batch.EcsJobDefinition(this, "JobDefinition", {
|
|
162
100
|
container: new batch.EcsEc2ContainerDefinition(this, "ContainerDefinition", {
|
|
163
101
|
image: runtime.image,
|
|
@@ -168,37 +106,17 @@ class NeuronxCompile extends constructs_1.Construct {
|
|
|
168
106
|
// https://docs.aws.amazon.com/batch/latest/userguide/memory-management.html
|
|
169
107
|
memory: aws_cdk_lib_1.Size.mebibytes(Math.ceil(instanceType.memory.toMebibytes() * 0.95)),
|
|
170
108
|
cpu: instanceType.vCpu,
|
|
171
|
-
command: [
|
|
172
|
-
`cat <<EOF > compile.py\n${compileScript}\nEOF\n`,
|
|
173
|
-
[
|
|
174
|
-
runtime.neuronxTransformersVersion
|
|
175
|
-
? "pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION"
|
|
176
|
-
: undefined,
|
|
177
|
-
"curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash",
|
|
178
|
-
"apt-get install git-lfs",
|
|
179
|
-
"git lfs install",
|
|
180
|
-
`git clone https://huggingface.co/${props.model.modelId} model`,
|
|
181
|
-
"rm -rf model/.git",
|
|
182
|
-
"python ./compile.py",
|
|
183
|
-
`aws s3 sync --no-progress ./model ${props.bucket.s3UrlForObject(`${compiledArtifactPathPrefix}/model`)}`,
|
|
184
|
-
`aws s3 sync --no-progress ./compiled ${props.bucket.s3UrlForObject(`${compiledArtifactPathPrefix}/compiled`)}`,
|
|
185
|
-
"echo 'compile completed'",
|
|
186
|
-
]
|
|
187
|
-
.filter((v) => !!v)
|
|
188
|
-
.join(" && "),
|
|
189
|
-
],
|
|
190
109
|
environment: {
|
|
191
110
|
MODEL_ID: props.model.modelId,
|
|
192
|
-
TP_DEGREE: tpDegree.toString(),
|
|
193
|
-
N_POSITIONS: nPositions.toString(),
|
|
194
|
-
OPT_LEVEL: optLevel.toString(),
|
|
195
|
-
QUANT_DTYPE: quantDtype?.toString() ?? "",
|
|
111
|
+
TP_DEGREE: this.tpDegree.toString(),
|
|
112
|
+
N_POSITIONS: this.nPositions.toString(),
|
|
113
|
+
OPT_LEVEL: this.optLevel.toString(),
|
|
114
|
+
QUANT_DTYPE: this.quantDtype?.toString() ?? "",
|
|
196
115
|
ARTIFACT_S3_URL: props.bucket.s3UrlForObject(compiledArtifactPathPrefix),
|
|
197
|
-
NEURONX_TRANSFORMERS_VERSION: runtime.neuronxTransformersVersion ?? "",
|
|
198
116
|
},
|
|
117
|
+
linuxParameters,
|
|
199
118
|
}),
|
|
200
119
|
});
|
|
201
|
-
this.connectAcceleratorChips(jobDefinition, instanceType);
|
|
202
120
|
const jobSubmitFunction = new aws_lambda_1.SingletonFunction(this, "JobSubmitFunction", {
|
|
203
121
|
code: aws_lambda_1.Code.fromAsset((0, path_1.join)(__dirname, "private/await-compile-job")),
|
|
204
122
|
handler: "index.onEvent",
|
|
@@ -229,42 +147,11 @@ class NeuronxCompile extends constructs_1.Construct {
|
|
|
229
147
|
properties: {
|
|
230
148
|
jobDefinitionArn: jobDefinition.jobDefinitionArn,
|
|
231
149
|
jobQueueArn: jobQueue.jobQueueArn,
|
|
232
|
-
|
|
150
|
+
artifactS3Prefix: compiledArtifactPathPrefix,
|
|
233
151
|
},
|
|
234
152
|
});
|
|
235
|
-
this.
|
|
236
|
-
|
|
237
|
-
connectAcceleratorChips(jobDefinition, instanceType) {
|
|
238
|
-
const devices = Array.from({
|
|
239
|
-
length: instanceType.acceleratorChips.chips,
|
|
240
|
-
}).map((_, index) => ({
|
|
241
|
-
HostPath: `/dev/neuron${index}`,
|
|
242
|
-
ContainerPath: `/dev/neuron${index}`,
|
|
243
|
-
Permissions: ["read", "write"],
|
|
244
|
-
}));
|
|
245
|
-
const cfnJobDefinition = jobDefinition.node
|
|
246
|
-
.defaultChild;
|
|
247
|
-
cfnJobDefinition.addPropertyOverride("ContainerProperties.LinuxParameters.Devices", devices);
|
|
248
|
-
}
|
|
249
|
-
calcTpDegree(parameters, compileOptions) {
|
|
250
|
-
// case of float16
|
|
251
|
-
const bytesPerParamete = 16 / 8;
|
|
252
|
-
// memory = bytes per parameter * number of parameters
|
|
253
|
-
let memory = aws_cdk_lib_1.Size.gibibytes(bytesPerParamete * parameters.toBilion());
|
|
254
|
-
switch (compileOptions.quantDtype) {
|
|
255
|
-
case QuantDtype.S8:
|
|
256
|
-
memory = aws_cdk_lib_1.Size.gibibytes(memory.toGibibytes() / 2);
|
|
257
|
-
break;
|
|
258
|
-
}
|
|
259
|
-
const neronxCoreMemory = aws_cdk_lib_1.Size.gibibytes(16);
|
|
260
|
-
const minimum = Math.ceil(memory.toGibibytes() / neronxCoreMemory.toGibibytes());
|
|
261
|
-
const tpDegrees = [1, 2, 4, 8, 24];
|
|
262
|
-
for (const tpDegree of tpDegrees) {
|
|
263
|
-
if (minimum <= tpDegree) {
|
|
264
|
-
return tpDegree;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
throw new Error("This model is too large, I can not support this model current version.");
|
|
153
|
+
this.compiledArtifactS3Prefix = compileJob.getAttString("ArtifactS3Prefix");
|
|
154
|
+
this.compiledArtifactS3Url = props.bucket.s3UrlForObject(this.compiledArtifactS3Prefix);
|
|
268
155
|
}
|
|
269
156
|
selectInstanceTypeByTpDegree(tpDegree) {
|
|
270
157
|
const instanceTypes = [
|
|
@@ -281,6 +168,6 @@ class NeuronxCompile extends constructs_1.Construct {
|
|
|
281
168
|
}
|
|
282
169
|
}
|
|
283
170
|
exports.NeuronxCompile = NeuronxCompile;
|
|
284
|
-
|
|
285
|
-
NeuronxCompile[
|
|
286
|
-
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"neuronx-compile.js","sourceRoot":"","sources":["../src/neuronx-compile.ts"],"names":[],"mappings":";;;;;AAAA,2BAAkC;AAClC,+BAA4B;AAC5B,6CAAmE;AACnE,+CAA+C;AAC/C,2CAA2C;AAC3C,iDAAqD;AACrD,iDAA4C;AAC5C,uDAA0E;AAE1E,mEAAwD;AACxD,2CAAuC;AACvC,mEAA8D;AAC9D,6FAAuF;AAgBvF;;GAEG;AACH,IAAY,UAKX;AALD,WAAY,UAAU;IACpB;;OAEG;IACH,uBAAS,CAAA;AACX,CAAC,EALW,UAAU,0BAAV,UAAU,QAKrB;AAED;;GAEG;AACH,IAAY,QAaX;AAbD,WAAY,QAAQ;IAClB;;OAEG;IACH,6EAA2B,CAAA;IAC3B;;OAEG;IACH,uDAAgB,CAAA;IAChB;;OAEG;IACH,qFAA+B,CAAA;AACjC,CAAC,EAbW,QAAQ,wBAAR,QAAQ,QAanB;AAwBD;;GAEG;AACH,MAAa,UAAU;IACrB;;;;OAIG;IACH,MAAM,CAAC,OAAO,CAAC,UAAkB;QAC/B,OAAO,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC;IACpC,CAAC;IACD,YAAqC,OAAe;QAAf,YAAO,GAAP,OAAO,CAAQ;IAAG,CAAC;IACxD;;;OAGG;IACH,QAAQ;QACN,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;;AAhBH,gCAiBC;;;AAQD;;GAEG;AACH,MAAa,KAAK;IAChB;;;;;OAKG;IACH,MAAM,CAAC,eAAe,CAAC,OAAe,EAAE,OAAqB;QAC3D,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IACrC,CAAC;IACD,YACW,OAAe,EACf,OAAqB;QADrB,YAAO,GAAP,OAAO,CAAQ;QACf,YAAO,GAAP,OAAO,CAAc;IAC7B,CAAC;;AAbN,sBAcC;;;AAkDD;;GAEG;AACH,MAAa,cAAe,SAAQ,sBAAS;IAK3C,YAAY,KAAgB,EAAE,EAAU,EAAE,KAA0B;QAClE,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAEjB,MAAM,UAAU,GAAG,KAAK,CAAC,cAAc,EAAE,UAAU,IAAI,IAAI,CAAC;QAC5D,MAAM,UAAU,GAAG,KAAK,CAAC,cAAc,EAAE,UAAU,CAAC;QACpD,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,EAAE,QAAQ,IAAI,QAAQ,CAAC,YAAY,CAAC;QACzE,MAAM,QAAQ,GACZ,KAAK,CAAC,cAAc,EAAE,QAAQ;YAC9B,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE;gBAChD,UAAU;gBACV,UAAU;aACX,CAAC,CAAC;QACL,MAAM,YAAY,GAChB,KAAK,CAAC,YAAY,IAAI,IAAI,CAAC,4BAA4B,CAAC,QAAQ,CAAC,CAAC;QACpE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,gBAAgB,EAAE;YACpE,YAAY,EAAE;gBACZ;oBACE,UAAU,EAAE,WAAW;oBACvB,MAAM,EAAE,GAAG,CAAC,iBAAiB,CAAC,GAAG,CAC/B,KAAK,CAAC,UAAU,EAAE,WAAW,EAAE;wBAC7B,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC,CAChD;iBACF;aACF;SACF,CAAC,CAAC;QACH,MAAM,kBAAkB,GAAG,IAAI,KAAK,CAAC,+BAA+B,CAClE,IAAI,EACJ,oBAAoB,EACpB;YACE,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,aAAa,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC;YAC1C,yBAAyB,EAAE,KAAK;YAChC,MAAM,EAAE;gBACN;oBACE,KAAK,EAAE,IAAI,4DAA2B,CAAC,IAAI,EAAE,aAAa,CAAC;oBAC3D,aAAa;oBACb,SAAS,EAAE,YAAY;iBACxB;aACF;YACD,cAAc;YACd,IAAI,EAAE,KAAK,CAAC,IAAI;SACjB,CACF,CAAC;QAEA,kBAAkB,CAAC,IAAI,CAAC,YACzB,CAAC,mBAAmB,CACnB,yCAAyC,EACzC,cAAc,CAAC,mBAAmB,CACnC,CAAC;QAEF,kBAAI,CAAC,EAAE,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE;YACpD,mBAAmB,EAAE;gBACnB;oBACE,kBAAkB;oBAClB,KAAK,EAAE,CAAC;iBACT;aACF;YACD,wBAAwB,EAAE;gBACxB;oBACE,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,QAAQ;oBACnD,MAAM,EAAE,KAAK,CAAC,8BAA8B,CAAC,wBAAwB;oBACrE,OAAO,EAAE,sBAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC7B,MAAM,EAAE,KAAK,CAAC,8BAA8B,CAAC,MAAM;iBACpD;aACF;SACF,CAAC,CAAC;QAEH,MAAM,OAAO,GACX,KAAK,CAAC,OAAO,IAAI;YACf,KAAK,EAAE,wBAAc,CAAC,YAAY,CAChC,0FAA0F,CAC3F;YACD,cAAc,EAAE,QAAQ;YACxB,0BAA0B,EAAE,UAAmB;SAChD,CAAC;QACJ,IAAI,0BAA0B,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,YAAY,OAAO,CAAC,cAAc,MAAM,QAAQ,MAAM,UAAU,OAAO,QAAQ,EAAE,CAAC;QACzI,IAAI,UAAY,EAAE,CAAC;YACjB,0BAA0B,GAAG,GAAG,0BAA0B,SAAS,UAAU,EAAE,CAAC;QAClF,CAAC;QACD,KAAK,CAAC,MAAM,CAAC,cAAc,CACzB,kBAAkB,CAAC,YAAa,EAChC,GAAG,0BAA0B,IAAI,CAClC,CAAC;QAEF,MAAM,aAAa,GAAG,IAAA,iBAAY,EAChC,IAAA,WAAI,EAAC,SAAS,EAAE,uBAAuB,CAAC,CACzC,CAAC,QAAQ,EAAE,CAAC;QACb,MAAM,aAAa,GAAG,IAAI,KAAK,CAAC,gBAAgB,CAAC,IAAI,EAAE,eAAe,EAAE;YACtE,SAAS,EAAE,IAAI,KAAK,CAAC,yBAAyB,CAC5C,IAAI,EACJ,qBAAqB,EACrB;gBACE,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,oDAAoD;gBACpD,kBAAkB;gBAClB,2DAA2D;gBAC3D,0EAA0E;gBAC1E,4EAA4E;gBAC5E,MAAM,EAAE,kBAAI,CAAC,SAAS,CACpB,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,IAAI,CAAC,CACpD;gBACD,GAAG,EAAE,YAAY,CAAC,IAAI;gBACtB,OAAO,EAAE;oBACP,2BAA2B,aAAa,SAAS;oBACjD;wBACE,OAAO,CAAC,0BAA0B;4BAChC,CAAC,CAAC,6HAA6H;4BAC/H,CAAC,CAAC,SAAS;wBACb,0FAA0F;wBAC1F,yBAAyB;wBACzB,iBAAiB;wBACjB,oCAAoC,KAAK,CAAC,KAAK,CAAC,OAAO,QAAQ;wBAC/D,mBAAmB;wBACnB,qBAAqB;wBACrB,qCAAqC,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,0BAA0B,QAAQ,CAAC,EAAE;wBACzG,wCAAwC,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,0BAA0B,WAAW,CAAC,EAAE;wBAC/G,0BAA0B;qBAC3B;yBACE,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;yBAClB,IAAI,CAAC,MAAM,CAAC;iBAChB;gBACD,WAAW,EAAE;oBACX,QAAQ,EAAE,KAAK,CAAC,KAAK,CAAC,OAAO;oBAC7B,SAAS,EAAE,QAAQ,CAAC,QAAQ,EAAE;oBAC9B,WAAW,EAAE,UAAU,CAAC,QAAQ,EAAE;oBAClC,SAAS,EAAE,QAAQ,CAAC,QAAQ,EAAE;oBAC9B,WAAW,EAAE,UAAU,EAAE,QAAQ,EAAE,IAAI,EAAE;oBACzC,eAAe,EAAE,KAAK,CAAC,MAAM,CAAC,cAAc,CAC1C,0BAA0B,CAC3B;oBACD,4BAA4B,EAC1B,OAAO,CAAC,0BAA0B,IAAI,EAAE;iBAC3C;aACF,CACF;SACF,CAAC,CAAC;QACH,IAAI,CAAC,uBAAuB,CAAC,aAAa,EAAE,YAAY,CAAC,CAAC;QAE1D,MAAM,iBAAiB,GAAG,IAAI,8BAAiB,CAAC,IAAI,EAAE,mBAAmB,EAAE;YACzE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,eAAe;YACxB,OAAO,EAAE,oBAAO,CAAC,WAAW;YAC5B,IAAI,EAAE,sCAAsC;SAC7C,CAAC,CAAC;QACH,aAAa,CAAC,cAAc,CAAC,iBAAiB,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,qBAAqB,GAAG,IAAI,8BAAiB,CACjD,IAAI,EACJ,uBAAuB,EACvB;YACE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,kBAAkB;YAC3B,OAAO,EAAE,oBAAO,CAAC,WAAW;YAC5B,IAAI,EAAE,sCAAsC;SAC7C,CACF,CAAC;QACF,eAAK,CAAC,cAAc,CAAC;YACnB,YAAY,EAAE,CAAC,GAAG,CAAC;YACnB,OAAO,EAAE,qBAAqB;YAC9B,OAAO,EAAE,CAAC,oBAAoB,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,2BAAQ,CAAC,IAAI,EAAE,oBAAoB,EAAE;YACxD,cAAc,EAAE,iBAAiB;YACjC,iBAAiB,EAAE,qBAAqB;YACxC,aAAa,EAAE,sBAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAClC,YAAY,EAAE,sBAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,IAAI,4BAAc,CAAC,IAAI,EAAE,UAAU,EAAE;YACtD,YAAY,EAAE,QAAQ,CAAC,YAAY;YACnC,YAAY,EAAE,wBAAwB;YACtC,UAAU,EAAE;gBACV,gBAAgB,EAAE,aAAa,CAAC,gBAAgB;gBAChD,WAAW,EAAE,QAAQ,CAAC,WAAW;gBACjC,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,cAAc,CAAC,0BAA0B,CAAC;aACvE;SACF,CAAC,CAAC;QACH,IAAI,CAAC,qBAAqB,GAAG,UAAU,CAAC,YAAY,CAAC,eAAe,CAAC,CAAC;IACxE,CAAC;IAEO,uBAAuB,CAC7B,aAAqC,EACrC,YAAiC;QAKjC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC;YACzB,MAAM,EAAE,YAAY,CAAC,gBAAgB,CAAC,KAAK;SAC5C,CAAC,CAAC,GAAG,CACJ,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CACX,CAAC;YACC,QAAQ,EAAE,cAAc,KAAK,EAAE;YAC/B,aAAa,EAAE,cAAc,KAAK,EAAE;YACpC,WAAW,EAAE,CAAC,MAAM,EAAE,OAAO,CAAC;SAC/B,CAA6D,CACjE,CAAC;QACF,MAAM,gBAAgB,GAAG,aAAa,CAAC,IAAI;aACxC,YAAsC,CAAC;QAC1C,gBAAgB,CAAC,mBAAmB,CAClC,6CAA6C,EAC7C,OAAO,CACR,CAAC;IACJ,CAAC;IAEO,YAAY,CAAC,UAAsB,EAAE,cAA8B;QACzE,kBAAkB;QAClB,MAAM,gBAAgB,GAAG,EAAE,GAAG,CAAC,CAAC;QAChC,sDAAsD;QACtD,IAAI,MAAM,GAAG,kBAAI,CAAC,SAAS,CAAC,gBAAgB,GAAG,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;QACtE,QAAQ,cAAc,CAAC,UAAU,EAAE,CAAC;YAClC,KAAK,UAAU,CAAC,EAAE;gBAChB,MAAM,GAAG,kBAAI,CAAC,SAAS,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAC;gBAClD,MAAM;QACV,CAAC;QACD,MAAM,gBAAgB,GAAG,kBAAI,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CACvB,MAAM,CAAC,WAAW,EAAE,GAAG,gBAAgB,CAAC,WAAW,EAAE,CACtD,CAAC;QAEF,MAAM,SAAS,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;QACnC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,IAAI,OAAO,IAAI,QAAQ,EAAE,CAAC;gBACxB,OAAO,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;QACD,MAAM,IAAI,KAAK,CACb,wEAAwE,CACzE,CAAC;IACJ,CAAC;IAEO,4BAA4B,CAAC,QAAgB;QACnD,MAAM,aAAa,GAAG;YACpB,2CAAmB,CAAC,YAAY;YAChC,2CAAmB,CAAC,aAAa;YACjC,2CAAmB,CAAC,aAAa;SAClC,CAAC;QACF,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,IAAI,QAAQ,IAAI,YAAY,CAAC,gBAAgB,CAAC,YAAY,EAAE,CAAC;gBAC3D,OAAO,YAAY,CAAC;YACtB,CAAC;QACH,CAAC;QACD,MAAM,IAAI,KAAK,CACb,wEAAwE,CACzE,CAAC;IACJ,CAAC;;AA1PH,wCA2PC","sourcesContent":["import { readFileSync } from \"fs\";\nimport { join } from \"path\";\nimport { CustomResource, Duration, Size, Tags } from \"aws-cdk-lib\";\nimport * as batch from \"aws-cdk-lib/aws-batch\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport { ContainerImage } from \"aws-cdk-lib/aws-ecs\";\nimport { Grant } from \"aws-cdk-lib/aws-iam\";\nimport { Code, Runtime, SingletonFunction } from \"aws-cdk-lib/aws-lambda\";\nimport { IBucket } from \"aws-cdk-lib/aws-s3\";\nimport { Provider } from \"aws-cdk-lib/custom-resources\";\nimport { Construct } from \"constructs\";\nimport { NeuronxInstanceType } from \"./neuronx-instance-type\";\nimport { NeuronOptimizedMachineImage } from \"./private/neuron-optimized-machine-image\";\n\n/**\n * Compile runtime.\n */\nexport interface CompileRuntime {\n  /**\n   * An image of the container where the compile job is executed.\n   */\n  readonly image: ContainerImage;\n  /**\n   * Neuronx version included in container image.\n   */\n  readonly neuronxVersion: string;\n}\n\n/**\n * Quant data type.\n */\nexport enum QuantDtype {\n  /**\n   * int8 weight storage.\n   */\n  S8 = \"s8\",\n}\n\n/**\n * Optimization level.\n */\nexport enum OptLevel {\n  /**\n   * enables the core performance optimizations in the compiler, while also minimizing compile time.\n   */\n  MINIMIZING_COMPILE_TIME = 1,\n  /**\n   * provides the best balance between model performance and compile time.\n   */\n  BEST_BALANCE = 2,\n  /**\n   * may provide additional model execution performance but may incur longer compile times and higher host memory usage during model compilation.\n   */\n  MODEL_EXECUTION_PERFORMANCE = 3,\n}\n\n/**\n * Compile options.\n */\nexport interface CompileOptions {\n  /**\n   * @default - calc from parameters and quantDtype\n   */\n  readonly tpDegree?: number;\n  /**\n   * @default - No quant\n   */\n  readonly quantDtype?: QuantDtype;\n  /**\n   * @default 4092\n   */\n  readonly nPositions?: number;\n  /**\n   * @default OptLevel.BEST_BALANCE\n   */\n  readonly optLevel?: OptLevel;\n}\n\n/**\n * Represents the amount of parameters.\n */\nexport class Parameters {\n  /**\n   * Create a Parameters representing an amount bilion.\n   * @param parameters number of parameters bilionX\n   * @returns parameters\n   */\n  static billion(parameters: number) {\n    return new Parameters(parameters);\n  }\n  private constructor(private readonly billion: number) {}\n  /**\n   * Return this number of parameters as bilion.\n   * @returns This number of parameters as bilion.\n   */\n  toBilion() {\n    return this.billion;\n  }\n}\n\n/**\n * Compile target model basic infromation\n */\nexport interface ModelOptions {\n  readonly parameters: Parameters;\n}\n/**\n * Compile target model.\n */\nexport class Model {\n  /**\n   * model informations at HuggingFace\n   * @param modelId model id on the HuggingFace\n   * @param options model basic infromation\n   * @returns model instance\n   */\n  static fromHuggingFace(modelId: string, options: ModelOptions) {\n    return new Model(modelId, options);\n  }\n  private constructor(\n    readonly modelId: string,\n    readonly options: ModelOptions,\n  ) {}\n}\n/**\n * Props of NeuronxCompile.\n */\nexport interface NeuronxCompileProps {\n  /**\n   * VPC in which this will launch compile worker instance.\n   */\n  readonly vpc: ec2.IVpc;\n  /**\n   * The instance type of compile worker instance.\n   */\n  readonly instanceType?: NeuronxInstanceType;\n  /**\n   * The bucket to upload compiled artifacts.\n   */\n  readonly bucket: IBucket;\n  /**\n   * The model to be compiled.\n   */\n  readonly model: Model;\n  /**\n   * The root volume of worker instance.\n   * @default - N bilion parameters * 5GiB EBS\n   */\n  readonly volumeSize?: Size;\n  /**\n   * Compile runtime.\n   * @default { neuronxSdkVersion: \"2.19.0\", image: ContainerImage.fromRegistry(\"public.ecr.aws/neuron/pytorch-training-neuronx:2.1.2-neuronx-py310-sdk2.19.0-ubuntu20.04\")}\n   */\n  readonly runtime?: CompileRuntime;\n  /**\n   * Neuronx compile options.\n   * @default - Each properties are set default.\n   */\n  readonly compileOptions?: CompileOptions;\n  /**\n   * Whether or not to use spot instances. Spot instances are less expensive EC2 instances that can be reclaimed by EC2 at any time; your job will be given two minutes of notice before reclamation.\n   *\n   * @default false\n   */\n  readonly spot?: boolean;\n  /**\n   * The VPC Subnets this Compute Environment will launch instances in.\n   *\n   * @default - new subnets will be created\n   */\n  readonly vpcSubnets?: ec2.SubnetSelection;\n}\n\n/**\n * Neuronx compile construct. Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.\n */\nexport class NeuronxCompile extends Construct {\n  /**\n   * S3 URL that compiled artifact uploaded.\n   */\n  readonly compiledArtifactS3Url: string;\n  constructor(scope: Construct, id: string, props: NeuronxCompileProps) {\n    super(scope, id);\n\n    const nPositions = props.compileOptions?.nPositions ?? 4092;\n    const quantDtype = props.compileOptions?.quantDtype;\n    const optLevel = props.compileOptions?.optLevel ?? OptLevel.BEST_BALANCE;\n    const tpDegree =\n      props.compileOptions?.tpDegree ??\n      this.calcTpDegree(props.model.options.parameters, {\n        nPositions,\n        quantDtype,\n      });\n    const instanceType =\n      props.instanceType ?? this.selectInstanceTypeByTpDegree(tpDegree);\n    const launchTemplate = new ec2.LaunchTemplate(this, \"LaunchTemplate\", {\n      blockDevices: [\n        {\n          deviceName: \"/dev/xvda\",\n          volume: ec2.BlockDeviceVolume.ebs(\n            props.volumeSize?.toGibibytes() ??\n              props.model.options.parameters.toBilion() * 5,\n          ),\n        },\n      ],\n    });\n    const computeEnvironment = new batch.ManagedEc2EcsComputeEnvironment(\n      this,\n      \"ComputeEnvironment\",\n      {\n        vpc: props.vpc,\n        vpcSubnets: props.vpcSubnets,\n        instanceTypes: [instanceType.instanceType],\n        useOptimalInstanceClasses: false,\n        images: [\n          {\n            image: new NeuronOptimizedMachineImage(this, \"MachinImage\"),\n            // @ts-ignore\n            imageType: \"ECS_AL2023\",\n          },\n        ],\n        launchTemplate,\n        spot: props.spot,\n      },\n    );\n    (\n      computeEnvironment.node.defaultChild as batch.CfnComputeEnvironment\n    ).addPropertyOverride(\n      \"ComputeResources.LaunchTemplate.Version\",\n      launchTemplate.latestVersionNumber,\n    );\n\n    Tags.of(computeEnvironment).add(\"Name\", \"neuronx-compile-worker\");\n    const jobQueue = new batch.JobQueue(this, \"JobQueue\", {\n      computeEnvironments: [\n        {\n          computeEnvironment,\n          order: 1,\n        },\n      ],\n      jobStateTimeLimitActions: [\n        {\n          state: batch.JobStateTimeLimitActionsState.RUNNABLE,\n          reason: batch.JobStateTimeLimitActionsReason.JOB_RESOURCE_REQUIREMENT,\n          maxTime: Duration.minutes(10),\n          action: batch.JobStateTimeLimitActionsAction.CANCEL,\n        },\n      ],\n    });\n\n    const runtime: CompileRuntime & { neuronxTransformersVersion?: string } =\n      props.runtime ?? {\n        image: ContainerImage.fromRegistry(\n          \"public.ecr.aws/neuron/pytorch-training-neuronx:2.1.2-neuronx-py310-sdk2.19.0-ubuntu20.04\",\n        ),\n        neuronxVersion: \"2.19.0\",\n        neuronxTransformersVersion: \"0.11.351\" as const,\n      };\n    let compiledArtifactPathPrefix = `${props.model.modelId}/neuronx-${runtime.neuronxVersion}/tp${tpDegree}-np${nPositions}-opt${optLevel}`;\n    if (quantDtype!!) {\n      compiledArtifactPathPrefix = `${compiledArtifactPathPrefix}-quant${quantDtype}`;\n    }\n    props.bucket.grantReadWrite(\n      computeEnvironment.instanceRole!,\n      `${compiledArtifactPathPrefix}/*`,\n    );\n\n    const compileScript = readFileSync(\n      join(__dirname, \"../scripts/compile.py\"),\n    ).toString();\n    const jobDefinition = new batch.EcsJobDefinition(this, \"JobDefinition\", {\n      container: new batch.EcsEc2ContainerDefinition(\n        this,\n        \"ContainerDefinition\",\n        {\n          image: runtime.image,\n          // The fllowing command was executed on inf2.8xlarge\n          // sh-5.2$ free -b\n          // \t\t\ttotal\t\t\t\t\tused\t\t\tfree\t\t\t\t\tshared\tbuff/cache\tavailable\n          // Mem:\t132265766912\t866320384\t130341785600\t667648\t1057660928\t130529148928\n          // https://docs.aws.amazon.com/batch/latest/userguide/memory-management.html\n          memory: Size.mebibytes(\n            Math.ceil(instanceType.memory.toMebibytes() * 0.95),\n          ),\n          cpu: instanceType.vCpu,\n          command: [\n            `cat <<EOF > compile.py\\n${compileScript}\\nEOF\\n`,\n            [\n              runtime.neuronxTransformersVersion\n                ? \"pip install -U --extra-index-url https://pip.repos.neuron.amazonaws.com transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION\"\n                : undefined,\n              \"curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash\",\n              \"apt-get install git-lfs\",\n              \"git lfs install\",\n              `git clone https://huggingface.co/${props.model.modelId} model`,\n              \"rm -rf model/.git\",\n              \"python ./compile.py\",\n              `aws s3 sync --no-progress ./model ${props.bucket.s3UrlForObject(`${compiledArtifactPathPrefix}/model`)}`,\n              `aws s3 sync --no-progress ./compiled ${props.bucket.s3UrlForObject(`${compiledArtifactPathPrefix}/compiled`)}`,\n              \"echo 'compile completed'\",\n            ]\n              .filter((v) => !!v)\n              .join(\" && \"),\n          ],\n          environment: {\n            MODEL_ID: props.model.modelId,\n            TP_DEGREE: tpDegree.toString(),\n            N_POSITIONS: nPositions.toString(),\n            OPT_LEVEL: optLevel.toString(),\n            QUANT_DTYPE: quantDtype?.toString() ?? \"\",\n            ARTIFACT_S3_URL: props.bucket.s3UrlForObject(\n              compiledArtifactPathPrefix,\n            ),\n            NEURONX_TRANSFORMERS_VERSION:\n              runtime.neuronxTransformersVersion ?? \"\",\n          },\n        },\n      ),\n    });\n    this.connectAcceleratorChips(jobDefinition, instanceType);\n\n    const jobSubmitFunction = new SingletonFunction(this, \"JobSubmitFunction\", {\n      code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n      handler: \"index.onEvent\",\n      runtime: Runtime.NODEJS_20_X,\n      uuid: \"1361f469-5c92-4c46-9e11-5d1dbf925bac\",\n    });\n    jobDefinition.grantSubmitJob(jobSubmitFunction, jobQueue);\n    const jobMonitoringFunction = new SingletonFunction(\n      this,\n      \"JobMonitoringFunction\",\n      {\n        code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n        handler: \"index.isComplete\",\n        runtime: Runtime.NODEJS_20_X,\n        uuid: \"df16dba8-5f77-480c-a6ad-cfdf74c3de62\",\n      },\n    );\n    Grant.addToPrincipal({\n      resourceArns: [\"*\"],\n      grantee: jobMonitoringFunction,\n      actions: [\"batch:DescribeJobs\"],\n    });\n    const provider = new Provider(this, \"CompileJobProvider\", {\n      onEventHandler: jobSubmitFunction,\n      isCompleteHandler: jobMonitoringFunction,\n      queryInterval: Duration.minutes(1),\n      totalTimeout: Duration.hours(1),\n    });\n    const compileJob = new CustomResource(this, \"Resource\", {\n      serviceToken: provider.serviceToken,\n      resourceType: \"Custom::NeuronxCompile\",\n      properties: {\n        jobDefinitionArn: jobDefinition.jobDefinitionArn,\n        jobQueueArn: jobQueue.jobQueueArn,\n        artifactS3Url: props.bucket.s3UrlForObject(compiledArtifactPathPrefix),\n      },\n    });\n    this.compiledArtifactS3Url = compileJob.getAttString(\"ArtifactS3Url\");\n  }\n\n  private connectAcceleratorChips(\n    jobDefinition: batch.EcsJobDefinition,\n    instanceType: NeuronxInstanceType,\n  ) {\n    type PascalCase<T extends object> = {\n      [P in keyof T as P extends string ? Capitalize<P> : never]: T[P];\n    };\n    const devices = Array.from({\n      length: instanceType.acceleratorChips.chips,\n    }).map(\n      (_, index) =>\n        ({\n          HostPath: `/dev/neuron${index}`,\n          ContainerPath: `/dev/neuron${index}`,\n          Permissions: [\"read\", \"write\"],\n        }) satisfies PascalCase<batch.CfnJobDefinition.DeviceProperty>,\n    );\n    const cfnJobDefinition = jobDefinition.node\n      .defaultChild as batch.CfnJobDefinition;\n    cfnJobDefinition.addPropertyOverride(\n      \"ContainerProperties.LinuxParameters.Devices\",\n      devices,\n    );\n  }\n\n  private calcTpDegree(parameters: Parameters, compileOptions: CompileOptions) {\n    // case of float16\n    const bytesPerParamete = 16 / 8;\n    // memory = bytes per parameter * number of parameters\n    let memory = Size.gibibytes(bytesPerParamete * parameters.toBilion());\n    switch (compileOptions.quantDtype) {\n      case QuantDtype.S8:\n        memory = Size.gibibytes(memory.toGibibytes() / 2);\n        break;\n    }\n    const neronxCoreMemory = Size.gibibytes(16);\n    const minimum = Math.ceil(\n      memory.toGibibytes() / neronxCoreMemory.toGibibytes(),\n    );\n\n    const tpDegrees = [1, 2, 4, 8, 24];\n    for (const tpDegree of tpDegrees) {\n      if (minimum <= tpDegree) {\n        return tpDegree;\n      }\n    }\n    throw new Error(\n      \"This model is too large, I can not support this model current version.\",\n    );\n  }\n\n  private selectInstanceTypeByTpDegree(tpDegree: number) {\n    const instanceTypes = [\n      NeuronxInstanceType.INF2_8XLARGE,\n      NeuronxInstanceType.INF2_24XLARGE,\n      NeuronxInstanceType.INF2_48XLARGE,\n    ];\n    for (const instanceType of instanceTypes) {\n      if (tpDegree <= instanceType.acceleratorChips.neuronxCores) {\n        return instanceType;\n      }\n    }\n    throw new Error(\n      \"This model is too large, I can not support this model current version.\",\n    );\n  }\n}\n"]}
|
|
171
|
+
_a = JSII_RTTI_SYMBOL_1;
|
|
172
|
+
NeuronxCompile[_a] = { fqn: "aws-cdk-neuronx-patterns.NeuronxCompile", version: "0.0.4" };
|
|
173
|
+
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"neuronx-compile.js","sourceRoot":"","sources":["../src/neuronx-compile.ts"],"names":[],"mappings":";;;;;AAAA,+BAA4B;AAC5B,6CAAmE;AACnE,+CAA+C;AAC/C,2CAA2C;AAC3C,iDAAqD;AACrD,iDAA4C;AAC5C,uDAA0E;AAE1E,mEAAwD;AACxD,2CAAuC;AACvC,mCAMiB;AACjB,mEAA8D;AAC9D,6FAAuF;AACvF,yCAA8C;AAgE9C;;GAEG;AACH,MAAa,cAAe,SAAQ,sBAAS;IAe3C,YAAY,KAAgB,EAAE,EAAU,EAAE,KAA0B;QAClE,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAEjB,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC;QACjD,IAAI,CAAC,wBAAwB,GAAG,KAAK,CAAC,MAAM,CAAC;QAC7C,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,cAAc,EAAE,UAAU,IAAI,IAAI,CAAC;QAC3D,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC,cAAc,EAAE,UAAU,CAAC;QACnD,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC,cAAc,EAAE,QAAQ,IAAI,gBAAQ,CAAC,YAAY,CAAC;QACxE,IAAI,CAAC,QAAQ;YACX,KAAK,CAAC,cAAc,EAAE,QAAQ;gBAC9B,IAAA,mBAAY,EAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE;oBAC3C,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,UAAU,EAAE,IAAI,CAAC,UAAU;iBAC5B,CAAC,CAAC;QACL,MAAM,YAAY,GAChB,KAAK,CAAC,YAAY,IAAI,IAAI,CAAC,4BAA4B,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACzE,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,cAAc,CAAC,IAAI,EAAE,gBAAgB,EAAE;YACpE,YAAY,EAAE;gBACZ;oBACE,UAAU,EAAE,WAAW;oBACvB,MAAM,EAAE,GAAG,CAAC,iBAAiB,CAAC,GAAG,CAC/B,KAAK,CAAC,UAAU,EAAE,WAAW,EAAE;wBAC7B,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC,CAChD;iBACF;aACF;SACF,CAAC,CAAC;QACH,MAAM,kBAAkB,GAAG,IAAI,KAAK,CAAC,+BAA+B,CAClE,IAAI,EACJ,oBAAoB,EACpB;YACE,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,UAAU,EAAE,KAAK,CAAC,UAAU;YAC5B,aAAa,EAAE,CAAC,YAAY,CAAC,YAAY,CAAC;YAC1C,yBAAyB,EAAE,KAAK;YAChC,MAAM,EAAE;gBACN;oBACE,KAAK,EAAE,IAAI,4DAA2B,CAAC,IAAI,EAAE,aAAa,CAAC;oBAC3D,aAAa;oBACb,SAAS,EAAE,YAAY;iBACxB;aACF;YACD,cAAc;YACd,IAAI,EAAE,KAAK,CAAC,IAAI;SACjB,CACF,CAAC;QAEA,kBAAkB,CAAC,IAAI,CAAC,YACzB,CAAC,mBAAmB,CACnB,yCAAyC,EACzC,cAAc,CAAC,mBAAmB,CACnC,CAAC;QAEF,kBAAI,CAAC,EAAE,CAAC,kBAAkB,CAAC,CAAC,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,UAAU,EAAE;YACpD,mBAAmB,EAAE;gBACnB;oBACE,kBAAkB;oBAClB,KAAK,EAAE,CAAC;iBACT;aACF;YACD,wBAAwB,EAAE;gBACxB;oBACE,KAAK,EAAE,KAAK,CAAC,6BAA6B,CAAC,QAAQ;oBACnD,MAAM,EAAE,KAAK,CAAC,8BAA8B,CAAC,wBAAwB;oBACrE,OAAO,EAAE,sBAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC7B,MAAM,EAAE,KAAK,CAAC,8BAA8B,CAAC,MAAM;iBACpD;aACF;SACF,CAAC,CAAC;QAEH,MAAM,OAAO,GAAmB,KAAK,CAAC,OAAO,IAAI;YAC/C,KAAK,EAAE,wBAAc,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,oBAAoB,CAAC,CAAC;YACtE,cAAc,EAAE,QAAQ;SACzB,CAAC;QACF,IAAI,0BAA0B,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,YAAY,OAAO,CAAC,cAAc,MAAM,IAAI,CAAC,QAAQ,MAAM,IAAI,CAAC,UAAU,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;QACxJ,IAAI,IAAI,CAAC,UAAY,EAAE,CAAC;YACtB,0BAA0B,GAAG,GAAG,0BAA0B,SAAS,IAAI,CAAC,UAAU,EAAE,CAAC;QACvF,CAAC;QACD,KAAK,CAAC,MAAM,CAAC,cAAc,CACzB,kBAAkB,CAAC,YAAa,EAChC,GAAG,0BAA0B,IAAI,CAClC,CAAC;QACF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,eAAe,CAAC,IAAI,EAAE,iBAAiB,CAAC,CAAC;QAC3E,eAAe,CAAC,UAAU,CACxB,GAAG,KAAK,CAAC,IAAI,CAAC;YACZ,MAAM,EAAE,YAAY,CAAC,gBAAgB,CAAC,KAAK;SAC5C,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YACpB,QAAQ,EAAE,cAAc,KAAK,EAAE;YAC/B,aAAa,EAAE,cAAc,KAAK,EAAE;YACpC,WAAW,EAAE;gBACX,KAAK,CAAC,gBAAgB,CAAC,IAAI;gBAC3B,KAAK,CAAC,gBAAgB,CAAC,KAAK;aAC7B;SACF,CAAC,CAAC,CACJ,CAAC;QACF,MAAM,aAAa,GAAG,IAAI,KAAK,CAAC,gBAAgB,CAAC,IAAI,EAAE,eAAe,EAAE;YACtE,SAAS,EAAE,IAAI,KAAK,CAAC,yBAAyB,CAC5C,IAAI,EACJ,qBAAqB,EACrB;gBACE,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,oDAAoD;gBACpD,kBAAkB;gBAClB,2DAA2D;gBAC3D,0EAA0E;gBAC1E,4EAA4E;gBAC5E,MAAM,EAAE,kBAAI,CAAC,SAAS,CACpB,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,WAAW,EAAE,GAAG,IAAI,CAAC,CACpD;gBACD,GAAG,EAAE,YAAY,CAAC,IAAI;gBACtB,WAAW,EAAE;oBACX,QAAQ,EAAE,KAAK,CAAC,KAAK,CAAC,OAAO;oBAC7B,SAAS,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE;oBACnC,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE;oBACvC,SAAS,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE;oBACnC,WAAW,EAAE,IAAI,CAAC,UAAU,EAAE,QAAQ,EAAE,IAAI,EAAE;oBAC9C,eAAe,EAAE,KAAK,CAAC,MAAM,CAAC,cAAc,CAC1C,0BAA0B,CAC3B;iBACF;gBACD,eAAe;aAChB,CACF;SACF,CAAC,CAAC;QAEH,MAAM,iBAAiB,GAAG,IAAI,8BAAiB,CAAC,IAAI,EAAE,mBAAmB,EAAE;YACzE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,eAAe;YACxB,OAAO,EAAE,oBAAO,CAAC,WAAW;YAC5B,IAAI,EAAE,sCAAsC;SAC7C,CAAC,CAAC;QACH,aAAa,CAAC,cAAc,CAAC,iBAAiB,EAAE,QAAQ,CAAC,CAAC;QAC1D,MAAM,qBAAqB,GAAG,IAAI,8BAAiB,CACjD,IAAI,EACJ,uBAAuB,EACvB;YACE,IAAI,EAAE,iBAAI,CAAC,SAAS,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,2BAA2B,CAAC,CAAC;YAClE,OAAO,EAAE,kBAAkB;YAC3B,OAAO,EAAE,oBAAO,CAAC,WAAW;YAC5B,IAAI,EAAE,sCAAsC;SAC7C,CACF,CAAC;QACF,eAAK,CAAC,cAAc,CAAC;YACnB,YAAY,EAAE,CAAC,GAAG,CAAC;YACnB,OAAO,EAAE,qBAAqB;YAC9B,OAAO,EAAE,CAAC,oBAAoB,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,QAAQ,GAAG,IAAI,2BAAQ,CAAC,IAAI,EAAE,oBAAoB,EAAE;YACxD,cAAc,EAAE,iBAAiB;YACjC,iBAAiB,EAAE,qBAAqB;YACxC,aAAa,EAAE,sBAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;YAClC,YAAY,EAAE,sBAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;SAChC,CAAC,CAAC;QACH,MAAM,UAAU,GAAG,IAAI,4BAAc,CAAC,IAAI,EAAE,UAAU,EAAE;YACtD,YAAY,EAAE,QAAQ,CAAC,YAAY;YACnC,YAAY,EAAE,wBAAwB;YACtC,UAAU,EAAE;gBACV,gBAAgB,EAAE,aAAa,CAAC,gBAAgB;gBAChD,WAAW,EAAE,QAAQ,CAAC,WAAW;gBACjC,gBAAgB,EAAE,0BAA0B;aAC7C;SACF,CAAC,CAAC;QACH,IAAI,CAAC,wBAAwB,GAAG,UAAU,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAC;QAC5E,IAAI,CAAC,qBAAqB,GAAG,KAAK,CAAC,MAAM,CAAC,cAAc,CACtD,IAAI,CAAC,wBAAwB,CAC9B,CAAC;IACJ,CAAC;IAEO,4BAA4B,CAAC,QAAgB;QACnD,MAAM,aAAa,GAAG;YACpB,2CAAmB,CAAC,YAAY;YAChC,2CAAmB,CAAC,aAAa;YACjC,2CAAmB,CAAC,aAAa;SAClC,CAAC;QACF,KAAK,MAAM,YAAY,IAAI,aAAa,EAAE,CAAC;YACzC,IAAI,QAAQ,IAAI,YAAY,CAAC,gBAAgB,CAAC,YAAY,EAAE,CAAC;gBAC3D,OAAO,YAAY,CAAC;YACtB,CAAC;QACH,CAAC;QACD,MAAM,IAAI,KAAK,CACb,wEAAwE,CACzE,CAAC;IACJ,CAAC;;AAtMH,wCAuMC","sourcesContent":["import { join } from \"path\";\nimport { CustomResource, Duration, Size, Tags } from \"aws-cdk-lib\";\nimport * as batch from \"aws-cdk-lib/aws-batch\";\nimport * as ec2 from \"aws-cdk-lib/aws-ec2\";\nimport { ContainerImage } from \"aws-cdk-lib/aws-ecs\";\nimport { Grant } from \"aws-cdk-lib/aws-iam\";\nimport { Code, Runtime, SingletonFunction } from \"aws-cdk-lib/aws-lambda\";\nimport { IBucket } from \"aws-cdk-lib/aws-s3\";\nimport { Provider } from \"aws-cdk-lib/custom-resources\";\nimport { Construct } from \"constructs\";\nimport {\n  CompileOptions,\n  Model,\n  OptLevel,\n  Parameters,\n  QuantDtype,\n} from \"./model\";\nimport { NeuronxInstanceType } from \"./neuronx-instance-type\";\nimport { NeuronOptimizedMachineImage } from \"./private/neuron-optimized-machine-image\";\nimport { calcTpDegree } from \"./private/util\";\n\n/**\n * Compile runtime.\n */\nexport interface CompileRuntime {\n  /**\n   * An image of the container where the compile job is executed.\n   */\n  readonly image: ContainerImage;\n  /**\n   * Neuronx version included in container image.\n   */\n  readonly neuronxVersion: string;\n}\n/**\n * Props of NeuronxCompile.\n */\nexport interface NeuronxCompileProps {\n  /**\n   * VPC in which this will launch compile worker instance.\n   */\n  readonly vpc: ec2.IVpc;\n  /**\n   * The bucket to upload compiled artifacts.\n   */\n  readonly bucket: IBucket;\n  /**\n   * The model to be compiled.\n   */\n  readonly model: Model;\n  /**\n   * The instance type of compile worker instance.\n   */\n  readonly instanceType?: NeuronxInstanceType;\n  /**\n   * The root volume of worker instance.\n   * @default - N bilion parameters * 5GiB EBS\n   */\n  readonly volumeSize?: Size;\n  /**\n   * Compile runtime.\n   * @default { neuronxSdkVersion: \"2.19.0\", image: ContainerImage.fromRegistry(\"public.ecr.aws/neuron/pytorch-training-neuronx:2.1.2-neuronx-py310-sdk2.19.0-ubuntu20.04\")}\n   */\n  readonly runtime?: CompileRuntime;\n  /**\n   * Neuronx compile options.\n   * @default - Each properties are set default.\n   */\n  readonly compileOptions?: CompileOptions;\n  /**\n   * Whether or not to use spot instances. Spot instances are less expensive EC2 instances that can be reclaimed by EC2 at any time; your job will be given two minutes of notice before reclamation.\n   *\n   * @default false\n   */\n  readonly spot?: boolean;\n  /**\n   * The VPC Subnets this Compute Environment will launch instances in.\n   *\n   * @default - new subnets will be created\n   */\n  readonly vpcSubnets?: ec2.SubnetSelection;\n}\n\n/**\n * Neuronx compile construct. Compile the model to work with Inferentia2 and Trainium1 and upload it to an S3 bucket.\n */\nexport class NeuronxCompile extends Construct {\n  readonly compiledArtifactS3Bucket: IBucket;\n  /**\n   * S3 URL that compiled artifact uploaded.\n   */\n  readonly compiledArtifactS3Url: string;\n  /**\n   * S3 Prefix that compiled artifact uploaded.\n   */\n  readonly compiledArtifactS3Prefix: string;\n  readonly tpDegree: number;\n  readonly quantDtype?: QuantDtype;\n  readonly nPositions: number;\n  readonly optLevel: OptLevel;\n  readonly parameters: Parameters;\n  constructor(scope: Construct, id: string, props: NeuronxCompileProps) {\n    super(scope, id);\n\n    this.parameters = props.model.options.parameters;\n    this.compiledArtifactS3Bucket = props.bucket;\n    this.nPositions = props.compileOptions?.nPositions ?? 4096;\n    this.quantDtype = props.compileOptions?.quantDtype;\n    this.optLevel = props.compileOptions?.optLevel ?? OptLevel.BEST_BALANCE;\n    this.tpDegree =\n      props.compileOptions?.tpDegree ??\n      calcTpDegree(props.model.options.parameters, {\n        nPositions: this.nPositions,\n        quantDtype: this.quantDtype,\n      });\n    const instanceType =\n      props.instanceType ?? this.selectInstanceTypeByTpDegree(this.tpDegree);\n    const launchTemplate = new ec2.LaunchTemplate(this, \"LaunchTemplate\", {\n      blockDevices: [\n        {\n          deviceName: \"/dev/xvda\",\n          volume: ec2.BlockDeviceVolume.ebs(\n            props.volumeSize?.toGibibytes() ??\n              props.model.options.parameters.toBilion() * 5,\n          ),\n        },\n      ],\n    });\n    const computeEnvironment = new batch.ManagedEc2EcsComputeEnvironment(\n      this,\n      \"ComputeEnvironment\",\n      {\n        vpc: props.vpc,\n        vpcSubnets: props.vpcSubnets,\n        instanceTypes: [instanceType.instanceType],\n        useOptimalInstanceClasses: false,\n        images: [\n          {\n            image: new NeuronOptimizedMachineImage(this, \"MachinImage\"),\n            // @ts-ignore\n            imageType: \"ECS_AL2023\",\n          },\n        ],\n        launchTemplate,\n        spot: props.spot,\n      },\n    );\n    (\n      computeEnvironment.node.defaultChild as batch.CfnComputeEnvironment\n    ).addPropertyOverride(\n      \"ComputeResources.LaunchTemplate.Version\",\n      launchTemplate.latestVersionNumber,\n    );\n\n    Tags.of(computeEnvironment).add(\"Name\", \"neuronx-compile-worker\");\n    const jobQueue = new batch.JobQueue(this, \"JobQueue\", {\n      computeEnvironments: [\n        {\n          computeEnvironment,\n          order: 1,\n        },\n      ],\n      jobStateTimeLimitActions: [\n        {\n          state: batch.JobStateTimeLimitActionsState.RUNNABLE,\n          reason: batch.JobStateTimeLimitActionsReason.JOB_RESOURCE_REQUIREMENT,\n          maxTime: Duration.minutes(10),\n          action: batch.JobStateTimeLimitActionsAction.CANCEL,\n        },\n      ],\n    });\n\n    const runtime: CompileRuntime = props.runtime ?? {\n      image: ContainerImage.fromAsset(join(__dirname, \"../scripts/compile\")),\n      neuronxVersion: \"2.19.1\",\n    };\n    let compiledArtifactPathPrefix = `${props.model.modelId}/neuronx-${runtime.neuronxVersion}/tp${this.tpDegree}-np${this.nPositions}-opt${this.optLevel}`;\n    if (this.quantDtype!!) {\n      compiledArtifactPathPrefix = `${compiledArtifactPathPrefix}-quant${this.quantDtype}`;\n    }\n    props.bucket.grantReadWrite(\n      computeEnvironment.instanceRole!,\n      `${compiledArtifactPathPrefix}/*`,\n    );\n    const linuxParameters = new batch.LinuxParameters(this, \"LinuxParameters\");\n    linuxParameters.addDevices(\n      ...Array.from({\n        length: instanceType.acceleratorChips.chips,\n      }).map((_, index) => ({\n        hostPath: `/dev/neuron${index}`,\n        containerPath: `/dev/neuron${index}`,\n        permissions: [\n          batch.DevicePermission.READ,\n          batch.DevicePermission.WRITE,\n        ],\n      })),\n    );\n    const jobDefinition = new batch.EcsJobDefinition(this, \"JobDefinition\", {\n      container: new batch.EcsEc2ContainerDefinition(\n        this,\n        \"ContainerDefinition\",\n        {\n          image: runtime.image,\n          // The fllowing command was executed on inf2.8xlarge\n          // sh-5.2$ free -b\n          // \t\t\ttotal\t\t\t\t\tused\t\t\tfree\t\t\t\t\tshared\tbuff/cache\tavailable\n          // Mem:\t132265766912\t866320384\t130341785600\t667648\t1057660928\t130529148928\n          // https://docs.aws.amazon.com/batch/latest/userguide/memory-management.html\n          memory: Size.mebibytes(\n            Math.ceil(instanceType.memory.toMebibytes() * 0.95),\n          ),\n          cpu: instanceType.vCpu,\n          environment: {\n            MODEL_ID: props.model.modelId,\n            TP_DEGREE: this.tpDegree.toString(),\n            N_POSITIONS: this.nPositions.toString(),\n            OPT_LEVEL: this.optLevel.toString(),\n            QUANT_DTYPE: this.quantDtype?.toString() ?? \"\",\n            ARTIFACT_S3_URL: props.bucket.s3UrlForObject(\n              compiledArtifactPathPrefix,\n            ),\n          },\n          linuxParameters,\n        },\n      ),\n    });\n\n    const jobSubmitFunction = new SingletonFunction(this, \"JobSubmitFunction\", {\n      code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n      handler: \"index.onEvent\",\n      runtime: Runtime.NODEJS_20_X,\n      uuid: \"1361f469-5c92-4c46-9e11-5d1dbf925bac\",\n    });\n    jobDefinition.grantSubmitJob(jobSubmitFunction, jobQueue);\n    const jobMonitoringFunction = new SingletonFunction(\n      this,\n      \"JobMonitoringFunction\",\n      {\n        code: Code.fromAsset(join(__dirname, \"private/await-compile-job\")),\n        handler: \"index.isComplete\",\n        runtime: Runtime.NODEJS_20_X,\n        uuid: \"df16dba8-5f77-480c-a6ad-cfdf74c3de62\",\n      },\n    );\n    Grant.addToPrincipal({\n      resourceArns: [\"*\"],\n      grantee: jobMonitoringFunction,\n      actions: [\"batch:DescribeJobs\"],\n    });\n    const provider = new Provider(this, \"CompileJobProvider\", {\n      onEventHandler: jobSubmitFunction,\n      isCompleteHandler: jobMonitoringFunction,\n      queryInterval: Duration.minutes(1),\n      totalTimeout: Duration.hours(1),\n    });\n    const compileJob = new CustomResource(this, \"Resource\", {\n      serviceToken: provider.serviceToken,\n      resourceType: \"Custom::NeuronxCompile\",\n      properties: {\n        jobDefinitionArn: jobDefinition.jobDefinitionArn,\n        jobQueueArn: jobQueue.jobQueueArn,\n        artifactS3Prefix: compiledArtifactPathPrefix,\n      },\n    });\n    this.compiledArtifactS3Prefix = compileJob.getAttString(\"ArtifactS3Prefix\");\n    this.compiledArtifactS3Url = props.bucket.s3UrlForObject(\n      this.compiledArtifactS3Prefix,\n    );\n  }\n\n  private selectInstanceTypeByTpDegree(tpDegree: number) {\n    const instanceTypes = [\n      NeuronxInstanceType.INF2_8XLARGE,\n      NeuronxInstanceType.INF2_24XLARGE,\n      NeuronxInstanceType.INF2_48XLARGE,\n    ];\n    for (const instanceType of instanceTypes) {\n      if (tpDegree <= instanceType.acceleratorChips.neuronxCores) {\n        return instanceType;\n      }\n    }\n    throw new Error(\n      \"This model is too large, I can not support this model current version.\",\n    );\n  }\n}\n"]}
|
|
@@ -14,7 +14,7 @@ class Inferentia2Chips {
|
|
|
14
14
|
}
|
|
15
15
|
exports.Inferentia2Chips = Inferentia2Chips;
|
|
16
16
|
_a = JSII_RTTI_SYMBOL_1;
|
|
17
|
-
Inferentia2Chips[_a] = { fqn: "aws-cdk-neuronx-patterns.Inferentia2Chips", version: "0.0.
|
|
17
|
+
Inferentia2Chips[_a] = { fqn: "aws-cdk-neuronx-patterns.Inferentia2Chips", version: "0.0.4" };
|
|
18
18
|
class NeuronxInstanceType {
|
|
19
19
|
constructor(instanceType, vCpu, memory, acceleratorChips) {
|
|
20
20
|
this.instanceType = instanceType;
|
|
@@ -32,7 +32,7 @@ class NeuronxInstanceType {
|
|
|
32
32
|
}
|
|
33
33
|
exports.NeuronxInstanceType = NeuronxInstanceType;
|
|
34
34
|
_b = JSII_RTTI_SYMBOL_1;
|
|
35
|
-
NeuronxInstanceType[_b] = { fqn: "aws-cdk-neuronx-patterns.NeuronxInstanceType", version: "0.0.
|
|
35
|
+
NeuronxInstanceType[_b] = { fqn: "aws-cdk-neuronx-patterns.NeuronxInstanceType", version: "0.0.4" };
|
|
36
36
|
/**
|
|
37
37
|
* ml.inf2.xlarge
|
|
38
38
|
*/
|