aws-cdk-neuronx-patterns 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.jsii +925 -106
- package/API.md +733 -1
- package/README.md +121 -27
- package/docs/neuronx-compile-architecture.png +0 -0
- package/lib/.types-compat/ts3.9/index.d.ts +2 -0
- package/lib/.types-compat/ts3.9/model.d.ts +97 -0
- package/lib/.types-compat/ts3.9/neuronx-compile.d.ts +15 -92
- package/lib/.types-compat/ts3.9/private/util.d.ts +2 -0
- package/lib/.types-compat/ts3.9/transformers-neuronx-sagemaker-realtime-inference.d.ts +113 -0
- package/lib/index.d.ts +2 -0
- package/lib/index.js +3 -1
- package/lib/model.d.ts +97 -0
- package/lib/model.js +93 -0
- package/lib/neuronx-compile.d.ts +15 -92
- package/lib/neuronx-compile.js +43 -156
- package/lib/neuronx-instance-type.js +2 -2
- package/lib/private/await-compile-job/index.js +2 -2
- package/lib/private/util.d.ts +2 -0
- package/lib/private/util.js +31 -0
- package/lib/transformers-neuronx-sagemaker-realtime-inference.d.ts +113 -0
- package/lib/transformers-neuronx-sagemaker-realtime-inference.js +150 -0
- package/package.json +9 -5
- package/scripts/compile/Dockerfile +10 -0
- package/scripts/compile/entrypoint.sh +9 -0
- package/scripts/inference/transformers-neuronx/Dockerfile +1 -0
- package/scripts/inference/transformers-neuronx/code/inference.py +63 -0
- package/scripts/inference/transformers-neuronx/code/requirements.txt +1 -0
- /package/scripts/{compile.py → compile/compile.py} +0 -0
package/API.md
CHANGED
|
@@ -89,7 +89,14 @@ Any object.
|
|
|
89
89
|
| **Name** | **Type** | **Description** |
|
|
90
90
|
| --- | --- | --- |
|
|
91
91
|
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.node">node</a></code> | <code>constructs.Node</code> | The tree node. |
|
|
92
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.compiledArtifactS3Bucket">compiledArtifactS3Bucket</a></code> | <code>aws-cdk-lib.aws_s3.IBucket</code> | *No description.* |
|
|
93
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.compiledArtifactS3Prefix">compiledArtifactS3Prefix</a></code> | <code>string</code> | S3 Prefix that compiled artifact uploaded. |
|
|
92
94
|
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.compiledArtifactS3Url">compiledArtifactS3Url</a></code> | <code>string</code> | S3 URL that compiled artifact uploaded. |
|
|
95
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.nPositions">nPositions</a></code> | <code>number</code> | *No description.* |
|
|
96
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.optLevel">optLevel</a></code> | <code><a href="#aws-cdk-neuronx-patterns.OptLevel">OptLevel</a></code> | *No description.* |
|
|
97
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.parameters">parameters</a></code> | <code><a href="#aws-cdk-neuronx-patterns.Parameters">Parameters</a></code> | *No description.* |
|
|
98
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.tpDegree">tpDegree</a></code> | <code>number</code> | *No description.* |
|
|
99
|
+
| <code><a href="#aws-cdk-neuronx-patterns.NeuronxCompile.property.quantDtype">quantDtype</a></code> | <code><a href="#aws-cdk-neuronx-patterns.QuantDtype">QuantDtype</a></code> | *No description.* |
|
|
93
100
|
|
|
94
101
|
---
|
|
95
102
|
|
|
@@ -105,6 +112,28 @@ The tree node.
|
|
|
105
112
|
|
|
106
113
|
---
|
|
107
114
|
|
|
115
|
+
##### `compiledArtifactS3Bucket`<sup>Required</sup> <a name="compiledArtifactS3Bucket" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.compiledArtifactS3Bucket"></a>
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
public readonly compiledArtifactS3Bucket: IBucket;
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
- *Type:* aws-cdk-lib.aws_s3.IBucket
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
##### `compiledArtifactS3Prefix`<sup>Required</sup> <a name="compiledArtifactS3Prefix" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.compiledArtifactS3Prefix"></a>
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
public readonly compiledArtifactS3Prefix: string;
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
- *Type:* string
|
|
132
|
+
|
|
133
|
+
S3 Prefix that compiled artifact uploaded.
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
108
137
|
##### `compiledArtifactS3Url`<sup>Required</sup> <a name="compiledArtifactS3Url" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.compiledArtifactS3Url"></a>
|
|
109
138
|
|
|
110
139
|
```typescript
|
|
@@ -117,9 +146,358 @@ S3 URL that compiled artifact uploaded.
|
|
|
117
146
|
|
|
118
147
|
---
|
|
119
148
|
|
|
149
|
+
##### `nPositions`<sup>Required</sup> <a name="nPositions" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.nPositions"></a>
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
public readonly nPositions: number;
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
- *Type:* number
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
##### `optLevel`<sup>Required</sup> <a name="optLevel" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.optLevel"></a>
|
|
160
|
+
|
|
161
|
+
```typescript
|
|
162
|
+
public readonly optLevel: OptLevel;
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.OptLevel">OptLevel</a>
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
##### `parameters`<sup>Required</sup> <a name="parameters" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.parameters"></a>
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
public readonly parameters: Parameters;
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.Parameters">Parameters</a>
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
##### `tpDegree`<sup>Required</sup> <a name="tpDegree" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.tpDegree"></a>
|
|
180
|
+
|
|
181
|
+
```typescript
|
|
182
|
+
public readonly tpDegree: number;
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
- *Type:* number
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
##### `quantDtype`<sup>Optional</sup> <a name="quantDtype" id="aws-cdk-neuronx-patterns.NeuronxCompile.property.quantDtype"></a>
|
|
190
|
+
|
|
191
|
+
```typescript
|
|
192
|
+
public readonly quantDtype: QuantDtype;
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.QuantDtype">QuantDtype</a>
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
### TransformersNeuronxSageMakerRealtimeInferenceEndpoint <a name="TransformersNeuronxSageMakerRealtimeInferenceEndpoint" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint"></a>
|
|
201
|
+
|
|
202
|
+
#### Initializers <a name="Initializers" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer"></a>
|
|
203
|
+
|
|
204
|
+
```typescript
|
|
205
|
+
import { TransformersNeuronxSageMakerRealtimeInferenceEndpoint } from 'aws-cdk-neuronx-patterns'
|
|
206
|
+
|
|
207
|
+
new TransformersNeuronxSageMakerRealtimeInferenceEndpoint(scope: Construct, id: string, props: TransformersNeuronxSageMakerRealtimeInferenceEndpointProps)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
| **Name** | **Type** | **Description** |
|
|
211
|
+
| --- | --- | --- |
|
|
212
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer.parameter.scope">scope</a></code> | <code>constructs.Construct</code> | *No description.* |
|
|
213
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer.parameter.id">id</a></code> | <code>string</code> | *No description.* |
|
|
214
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer.parameter.props">props</a></code> | <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps">TransformersNeuronxSageMakerRealtimeInferenceEndpointProps</a></code> | *No description.* |
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
##### `scope`<sup>Required</sup> <a name="scope" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer.parameter.scope"></a>
|
|
219
|
+
|
|
220
|
+
- *Type:* constructs.Construct
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
##### `id`<sup>Required</sup> <a name="id" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer.parameter.id"></a>
|
|
225
|
+
|
|
226
|
+
- *Type:* string
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
##### `props`<sup>Required</sup> <a name="props" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.Initializer.parameter.props"></a>
|
|
231
|
+
|
|
232
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps">TransformersNeuronxSageMakerRealtimeInferenceEndpointProps</a>
|
|
233
|
+
|
|
234
|
+
---
|
|
235
|
+
|
|
236
|
+
#### Methods <a name="Methods" id="Methods"></a>
|
|
237
|
+
|
|
238
|
+
| **Name** | **Description** |
|
|
239
|
+
| --- | --- |
|
|
240
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.toString">toString</a></code> | Returns a string representation of this construct. |
|
|
241
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.grantInvoke">grantInvoke</a></code> | *No description.* |
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
##### `toString` <a name="toString" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.toString"></a>
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
public toString(): string
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
Returns a string representation of this construct.
|
|
252
|
+
|
|
253
|
+
##### `grantInvoke` <a name="grantInvoke" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.grantInvoke"></a>
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
public grantInvoke(grantee: IGrantable): Grant
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
###### `grantee`<sup>Required</sup> <a name="grantee" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.grantInvoke.parameter.grantee"></a>
|
|
260
|
+
|
|
261
|
+
- *Type:* aws-cdk-lib.aws_iam.IGrantable
|
|
262
|
+
|
|
263
|
+
---
|
|
264
|
+
|
|
265
|
+
#### Static Functions <a name="Static Functions" id="Static Functions"></a>
|
|
266
|
+
|
|
267
|
+
| **Name** | **Description** |
|
|
268
|
+
| --- | --- |
|
|
269
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.isConstruct">isConstruct</a></code> | Checks if `x` is a construct. |
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
##### ~~`isConstruct`~~ <a name="isConstruct" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.isConstruct"></a>
|
|
274
|
+
|
|
275
|
+
```typescript
|
|
276
|
+
import { TransformersNeuronxSageMakerRealtimeInferenceEndpoint } from 'aws-cdk-neuronx-patterns'
|
|
277
|
+
|
|
278
|
+
TransformersNeuronxSageMakerRealtimeInferenceEndpoint.isConstruct(x: any)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
Checks if `x` is a construct.
|
|
282
|
+
|
|
283
|
+
###### `x`<sup>Required</sup> <a name="x" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.isConstruct.parameter.x"></a>
|
|
284
|
+
|
|
285
|
+
- *Type:* any
|
|
286
|
+
|
|
287
|
+
Any object.
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
#### Properties <a name="Properties" id="Properties"></a>
|
|
292
|
+
|
|
293
|
+
| **Name** | **Type** | **Description** |
|
|
294
|
+
| --- | --- | --- |
|
|
295
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.property.node">node</a></code> | <code>constructs.Node</code> | The tree node. |
|
|
296
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.property.endpointArn">endpointArn</a></code> | <code>string</code> | The ARN of the endpoint. |
|
|
297
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.property.endpointName">endpointName</a></code> | <code>string</code> | The name of the endpoint. |
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
##### `node`<sup>Required</sup> <a name="node" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.property.node"></a>
|
|
302
|
+
|
|
303
|
+
```typescript
|
|
304
|
+
public readonly node: Node;
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
- *Type:* constructs.Node
|
|
308
|
+
|
|
309
|
+
The tree node.
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
##### `endpointArn`<sup>Required</sup> <a name="endpointArn" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.property.endpointArn"></a>
|
|
314
|
+
|
|
315
|
+
```typescript
|
|
316
|
+
public readonly endpointArn: string;
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
- *Type:* string
|
|
320
|
+
|
|
321
|
+
The ARN of the endpoint.
|
|
322
|
+
|
|
323
|
+
---
|
|
324
|
+
|
|
325
|
+
##### `endpointName`<sup>Required</sup> <a name="endpointName" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpoint.property.endpointName"></a>
|
|
326
|
+
|
|
327
|
+
```typescript
|
|
328
|
+
public readonly endpointName: string;
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
- *Type:* string
|
|
332
|
+
|
|
333
|
+
The name of the endpoint.
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
120
337
|
|
|
121
338
|
## Structs <a name="Structs" id="Structs"></a>
|
|
122
339
|
|
|
340
|
+
### BucketCompiledModelOptions <a name="BucketCompiledModelOptions" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions"></a>
|
|
341
|
+
|
|
342
|
+
#### Initializer <a name="Initializer" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions.Initializer"></a>
|
|
343
|
+
|
|
344
|
+
```typescript
|
|
345
|
+
import { BucketCompiledModelOptions } from 'aws-cdk-neuronx-patterns'
|
|
346
|
+
|
|
347
|
+
const bucketCompiledModelOptions: BucketCompiledModelOptions = { ... }
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
#### Properties <a name="Properties" id="Properties"></a>
|
|
351
|
+
|
|
352
|
+
| **Name** | **Type** | **Description** |
|
|
353
|
+
| --- | --- | --- |
|
|
354
|
+
| <code><a href="#aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.code">code</a></code> | <code>aws-cdk-lib.aws_s3_deployment.ISource</code> | Code used for inference. |
|
|
355
|
+
| <code><a href="#aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.compiledArtifactPath">compiledArtifactPath</a></code> | <code>string</code> | The path where compiled artifacts (i.e. xxx.neff) are stored. |
|
|
356
|
+
| <code><a href="#aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.compileOptions">compileOptions</a></code> | <code><a href="#aws-cdk-neuronx-patterns.CompileOptions">CompileOptions</a></code> | Neuronx compile options. |
|
|
357
|
+
| <code><a href="#aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.modelIdOrPath">modelIdOrPath</a></code> | <code>string</code> | Model ID or saved path. |
|
|
358
|
+
| <code><a href="#aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.parameters">parameters</a></code> | <code><a href="#aws-cdk-neuronx-patterns.Parameters">Parameters</a></code> | The number of parameters of model. |
|
|
359
|
+
|
|
360
|
+
---
|
|
361
|
+
|
|
362
|
+
##### `code`<sup>Optional</sup> <a name="code" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.code"></a>
|
|
363
|
+
|
|
364
|
+
```typescript
|
|
365
|
+
public readonly code: ISource;
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
- *Type:* aws-cdk-lib.aws_s3_deployment.ISource
|
|
369
|
+
- *Default:* using the predefined code
|
|
370
|
+
|
|
371
|
+
Code used for inference.
|
|
372
|
+
|
|
373
|
+
---
|
|
374
|
+
|
|
375
|
+
##### `compiledArtifactPath`<sup>Optional</sup> <a name="compiledArtifactPath" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.compiledArtifactPath"></a>
|
|
376
|
+
|
|
377
|
+
```typescript
|
|
378
|
+
public readonly compiledArtifactPath: string;
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
- *Type:* string
|
|
382
|
+
- *Default:* "./compiled"
|
|
383
|
+
|
|
384
|
+
The path where compiled artifacts (i.e. xxx.neff) are stored.
|
|
385
|
+
|
|
386
|
+
---
|
|
387
|
+
|
|
388
|
+
##### `compileOptions`<sup>Optional</sup> <a name="compileOptions" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.compileOptions"></a>
|
|
389
|
+
|
|
390
|
+
```typescript
|
|
391
|
+
public readonly compileOptions: CompileOptions;
|
|
392
|
+
```
|
|
393
|
+
|
|
394
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.CompileOptions">CompileOptions</a>
|
|
395
|
+
- *Default:* Each properties are set default.
|
|
396
|
+
|
|
397
|
+
Neuronx compile options.
|
|
398
|
+
|
|
399
|
+
---
|
|
400
|
+
|
|
401
|
+
##### `modelIdOrPath`<sup>Optional</sup> <a name="modelIdOrPath" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.modelIdOrPath"></a>
|
|
402
|
+
|
|
403
|
+
```typescript
|
|
404
|
+
public readonly modelIdOrPath: string;
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
- *Type:* string
|
|
408
|
+
- *Default:* "./model"
|
|
409
|
+
|
|
410
|
+
Model ID or saved path.
|
|
411
|
+
|
|
412
|
+
---
|
|
413
|
+
|
|
414
|
+
##### `parameters`<sup>Required</sup> <a name="parameters" id="aws-cdk-neuronx-patterns.BucketCompiledModelOptions.property.parameters"></a>
|
|
415
|
+
|
|
416
|
+
```typescript
|
|
417
|
+
public readonly parameters: Parameters;
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.Parameters">Parameters</a>
|
|
421
|
+
|
|
422
|
+
The number of parameters of model.
|
|
423
|
+
|
|
424
|
+
---
|
|
425
|
+
|
|
426
|
+
### CompiledModelOptions <a name="CompiledModelOptions" id="aws-cdk-neuronx-patterns.CompiledModelOptions"></a>
|
|
427
|
+
|
|
428
|
+
Precompiled model options.
|
|
429
|
+
|
|
430
|
+
#### Initializer <a name="Initializer" id="aws-cdk-neuronx-patterns.CompiledModelOptions.Initializer"></a>
|
|
431
|
+
|
|
432
|
+
```typescript
|
|
433
|
+
import { CompiledModelOptions } from 'aws-cdk-neuronx-patterns'
|
|
434
|
+
|
|
435
|
+
const compiledModelOptions: CompiledModelOptions = { ... }
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
#### Properties <a name="Properties" id="Properties"></a>
|
|
439
|
+
|
|
440
|
+
| **Name** | **Type** | **Description** |
|
|
441
|
+
| --- | --- | --- |
|
|
442
|
+
| <code><a href="#aws-cdk-neuronx-patterns.CompiledModelOptions.property.code">code</a></code> | <code>aws-cdk-lib.aws_s3_deployment.ISource</code> | Code used for inference. |
|
|
443
|
+
| <code><a href="#aws-cdk-neuronx-patterns.CompiledModelOptions.property.compiledArtifactPath">compiledArtifactPath</a></code> | <code>string</code> | The path where compiled artifacts (i.e. xxx.neff) are stored. |
|
|
444
|
+
| <code><a href="#aws-cdk-neuronx-patterns.CompiledModelOptions.property.compileOptions">compileOptions</a></code> | <code><a href="#aws-cdk-neuronx-patterns.CompileOptions">CompileOptions</a></code> | Neuronx compile options. |
|
|
445
|
+
| <code><a href="#aws-cdk-neuronx-patterns.CompiledModelOptions.property.modelIdOrPath">modelIdOrPath</a></code> | <code>string</code> | Model ID or saved path. |
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
##### `code`<sup>Optional</sup> <a name="code" id="aws-cdk-neuronx-patterns.CompiledModelOptions.property.code"></a>
|
|
450
|
+
|
|
451
|
+
```typescript
|
|
452
|
+
public readonly code: ISource;
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
- *Type:* aws-cdk-lib.aws_s3_deployment.ISource
|
|
456
|
+
- *Default:* using the predefined code
|
|
457
|
+
|
|
458
|
+
Code used for inference.
|
|
459
|
+
|
|
460
|
+
---
|
|
461
|
+
|
|
462
|
+
##### `compiledArtifactPath`<sup>Optional</sup> <a name="compiledArtifactPath" id="aws-cdk-neuronx-patterns.CompiledModelOptions.property.compiledArtifactPath"></a>
|
|
463
|
+
|
|
464
|
+
```typescript
|
|
465
|
+
public readonly compiledArtifactPath: string;
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
- *Type:* string
|
|
469
|
+
- *Default:* "./compiled"
|
|
470
|
+
|
|
471
|
+
The path where compiled artifacts (i.e. xxx.neff) are stored.
|
|
472
|
+
|
|
473
|
+
---
|
|
474
|
+
|
|
475
|
+
##### `compileOptions`<sup>Optional</sup> <a name="compileOptions" id="aws-cdk-neuronx-patterns.CompiledModelOptions.property.compileOptions"></a>
|
|
476
|
+
|
|
477
|
+
```typescript
|
|
478
|
+
public readonly compileOptions: CompileOptions;
|
|
479
|
+
```
|
|
480
|
+
|
|
481
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.CompileOptions">CompileOptions</a>
|
|
482
|
+
- *Default:* Each properties are set default.
|
|
483
|
+
|
|
484
|
+
Neuronx compile options.
|
|
485
|
+
|
|
486
|
+
---
|
|
487
|
+
|
|
488
|
+
##### `modelIdOrPath`<sup>Optional</sup> <a name="modelIdOrPath" id="aws-cdk-neuronx-patterns.CompiledModelOptions.property.modelIdOrPath"></a>
|
|
489
|
+
|
|
490
|
+
```typescript
|
|
491
|
+
public readonly modelIdOrPath: string;
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
- *Type:* string
|
|
495
|
+
- *Default:* "./model"
|
|
496
|
+
|
|
497
|
+
Model ID or saved path.
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
123
501
|
### CompileOptions <a name="CompileOptions" id="aws-cdk-neuronx-patterns.CompileOptions"></a>
|
|
124
502
|
|
|
125
503
|
Compile options.
|
|
@@ -150,7 +528,7 @@ public readonly nPositions: number;
|
|
|
150
528
|
```
|
|
151
529
|
|
|
152
530
|
- *Type:* number
|
|
153
|
-
- *Default:*
|
|
531
|
+
- *Default:* 4096
|
|
154
532
|
|
|
155
533
|
---
|
|
156
534
|
|
|
@@ -405,6 +783,127 @@ The VPC Subnets this Compute Environment will launch instances in.
|
|
|
405
783
|
|
|
406
784
|
---
|
|
407
785
|
|
|
786
|
+
### TransformersNeuronxSageMakerRealtimeInferenceEndpointProps <a name="TransformersNeuronxSageMakerRealtimeInferenceEndpointProps" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps"></a>
|
|
787
|
+
|
|
788
|
+
#### Initializer <a name="Initializer" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.Initializer"></a>
|
|
789
|
+
|
|
790
|
+
```typescript
|
|
791
|
+
import { TransformersNeuronxSageMakerRealtimeInferenceEndpointProps } from 'aws-cdk-neuronx-patterns'
|
|
792
|
+
|
|
793
|
+
const transformersNeuronxSageMakerRealtimeInferenceEndpointProps: TransformersNeuronxSageMakerRealtimeInferenceEndpointProps = { ... }
|
|
794
|
+
```
|
|
795
|
+
|
|
796
|
+
#### Properties <a name="Properties" id="Properties"></a>
|
|
797
|
+
|
|
798
|
+
| **Name** | **Type** | **Description** |
|
|
799
|
+
| --- | --- | --- |
|
|
800
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.modelData">modelData</a></code> | <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData">TransformersNeuronxSageMakerInferenceModelData</a></code> | Model data for SageMaker inference. |
|
|
801
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.containerStartupHealthCheckTimeout">containerStartupHealthCheckTimeout</a></code> | <code>aws-cdk-lib.Duration</code> | The timeout value, for your inference container to pass health check by SageMaker Hosting. |
|
|
802
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.environment">environment</a></code> | <code>{[ key: string ]: string}</code> | A map of environment variables to pass into the container. |
|
|
803
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.image">image</a></code> | <code>@aws-cdk/aws-sagemaker-alpha.ContainerImage</code> | An image of the container where the inference job is executed. |
|
|
804
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.instanceType">instanceType</a></code> | <code><a href="#aws-cdk-neuronx-patterns.NeuronxInstanceType">NeuronxInstanceType</a></code> | The instance type of compile worker instance. |
|
|
805
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.modelDataDownloadTimeout">modelDataDownloadTimeout</a></code> | <code>aws-cdk-lib.Duration</code> | The timeout value, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. |
|
|
806
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.volumeSize">volumeSize</a></code> | <code>aws-cdk-lib.Size</code> | The size, of the ML storage volume attached to individual inference instance associated with the production variant. |
|
|
807
|
+
|
|
808
|
+
---
|
|
809
|
+
|
|
810
|
+
##### `modelData`<sup>Required</sup> <a name="modelData" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.modelData"></a>
|
|
811
|
+
|
|
812
|
+
```typescript
|
|
813
|
+
public readonly modelData: TransformersNeuronxSageMakerInferenceModelData;
|
|
814
|
+
```
|
|
815
|
+
|
|
816
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData">TransformersNeuronxSageMakerInferenceModelData</a>
|
|
817
|
+
|
|
818
|
+
Model data for SageMaker inference.
|
|
819
|
+
|
|
820
|
+
The model data requires at least compiled artifacts.
|
|
821
|
+
|
|
822
|
+
---
|
|
823
|
+
|
|
824
|
+
##### `containerStartupHealthCheckTimeout`<sup>Optional</sup> <a name="containerStartupHealthCheckTimeout" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.containerStartupHealthCheckTimeout"></a>
|
|
825
|
+
|
|
826
|
+
```typescript
|
|
827
|
+
public readonly containerStartupHealthCheckTimeout: Duration;
|
|
828
|
+
```
|
|
829
|
+
|
|
830
|
+
- *Type:* aws-cdk-lib.Duration
|
|
831
|
+
- *Default:* 60 seconds, when set the `modelDataDownloadTimeout` then use same value (max 60 minutes)
|
|
832
|
+
|
|
833
|
+
The timeout value, for your inference container to pass health check by SageMaker Hosting.
|
|
834
|
+
|
|
835
|
+
> [https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-inference-code.html#your-algorithms-inference-algo-ping-requests)
|
|
836
|
+
|
|
837
|
+
---
|
|
838
|
+
|
|
839
|
+
##### `environment`<sup>Optional</sup> <a name="environment" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.environment"></a>
|
|
840
|
+
|
|
841
|
+
```typescript
|
|
842
|
+
public readonly environment: {[ key: string ]: string};
|
|
843
|
+
```
|
|
844
|
+
|
|
845
|
+
- *Type:* {[ key: string ]: string}
|
|
846
|
+
- *Default:* Only the predefined environment variables required to use Neuronx have been set.
|
|
847
|
+
|
|
848
|
+
A map of environment variables to pass into the container.
|
|
849
|
+
|
|
850
|
+
---
|
|
851
|
+
|
|
852
|
+
##### `image`<sup>Optional</sup> <a name="image" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.image"></a>
|
|
853
|
+
|
|
854
|
+
```typescript
|
|
855
|
+
public readonly image: ContainerImage;
|
|
856
|
+
```
|
|
857
|
+
|
|
858
|
+
- *Type:* @aws-cdk/aws-sagemaker-alpha.ContainerImage
|
|
859
|
+
|
|
860
|
+
An image of the container where the inference job is executed.
|
|
861
|
+
|
|
862
|
+
---
|
|
863
|
+
|
|
864
|
+
##### `instanceType`<sup>Optional</sup> <a name="instanceType" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.instanceType"></a>
|
|
865
|
+
|
|
866
|
+
```typescript
|
|
867
|
+
public readonly instanceType: NeuronxInstanceType;
|
|
868
|
+
```
|
|
869
|
+
|
|
870
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.NeuronxInstanceType">NeuronxInstanceType</a>
|
|
871
|
+
- *Default:* It is determined automatically according to the number of model parameters and compilation options.
|
|
872
|
+
|
|
873
|
+
The instance type of compile worker instance.
|
|
874
|
+
|
|
875
|
+
---
|
|
876
|
+
|
|
877
|
+
##### `modelDataDownloadTimeout`<sup>Optional</sup> <a name="modelDataDownloadTimeout" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.modelDataDownloadTimeout"></a>
|
|
878
|
+
|
|
879
|
+
```typescript
|
|
880
|
+
public readonly modelDataDownloadTimeout: Duration;
|
|
881
|
+
```
|
|
882
|
+
|
|
883
|
+
- *Type:* aws-cdk-lib.Duration
|
|
884
|
+
- *Default:* 60 seconds, when `volumeSize` larger than 30GB then 1GB x 15 seconds (max 60 minutes)
|
|
885
|
+
|
|
886
|
+
The timeout value, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant.
|
|
887
|
+
|
|
888
|
+
---
|
|
889
|
+
|
|
890
|
+
##### `volumeSize`<sup>Optional</sup> <a name="volumeSize" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerRealtimeInferenceEndpointProps.property.volumeSize"></a>
|
|
891
|
+
|
|
892
|
+
```typescript
|
|
893
|
+
public readonly volumeSize: Size;
|
|
894
|
+
```
|
|
895
|
+
|
|
896
|
+
- *Type:* aws-cdk-lib.Size
|
|
897
|
+
- *Default:* 2.5 GB per billion parameter (Max 512 GB)
|
|
898
|
+
|
|
899
|
+
The size, of the ML storage volume attached to individual inference instance associated with the production variant.
|
|
900
|
+
|
|
901
|
+
Currently only Amazon EBS gp2 storage volumes are supported.
|
|
902
|
+
|
|
903
|
+
> [https://aws.amazon.com/jp/releasenotes/host-instance-storage-volumes-table](https://aws.amazon.com/jp/releasenotes/host-instance-storage-volumes-table)
|
|
904
|
+
|
|
905
|
+
---
|
|
906
|
+
|
|
408
907
|
## Classes <a name="Classes" id="Classes"></a>
|
|
409
908
|
|
|
410
909
|
### Inferentia2Chips <a name="Inferentia2Chips" id="aws-cdk-neuronx-patterns.Inferentia2Chips"></a>
|
|
@@ -483,10 +982,45 @@ Compile target model.
|
|
|
483
982
|
|
|
484
983
|
| **Name** | **Description** |
|
|
485
984
|
| --- | --- |
|
|
985
|
+
| <code><a href="#aws-cdk-neuronx-patterns.Model.fromBucket">fromBucket</a></code> | model informations at S3 Bucket. |
|
|
486
986
|
| <code><a href="#aws-cdk-neuronx-patterns.Model.fromHuggingFace">fromHuggingFace</a></code> | model informations at HuggingFace. |
|
|
487
987
|
|
|
488
988
|
---
|
|
489
989
|
|
|
990
|
+
##### `fromBucket` <a name="fromBucket" id="aws-cdk-neuronx-patterns.Model.fromBucket"></a>
|
|
991
|
+
|
|
992
|
+
```typescript
|
|
993
|
+
import { Model } from 'aws-cdk-neuronx-patterns'
|
|
994
|
+
|
|
995
|
+
Model.fromBucket(bucket: IBucket, prefix: string, options: ModelOptions)
|
|
996
|
+
```
|
|
997
|
+
|
|
998
|
+
model informations at S3 Bucket.
|
|
999
|
+
|
|
1000
|
+
###### `bucket`<sup>Required</sup> <a name="bucket" id="aws-cdk-neuronx-patterns.Model.fromBucket.parameter.bucket"></a>
|
|
1001
|
+
|
|
1002
|
+
- *Type:* aws-cdk-lib.aws_s3.IBucket
|
|
1003
|
+
|
|
1004
|
+
Model stored S3 Bucket.
|
|
1005
|
+
|
|
1006
|
+
---
|
|
1007
|
+
|
|
1008
|
+
###### `prefix`<sup>Required</sup> <a name="prefix" id="aws-cdk-neuronx-patterns.Model.fromBucket.parameter.prefix"></a>
|
|
1009
|
+
|
|
1010
|
+
- *Type:* string
|
|
1011
|
+
|
|
1012
|
+
Model stored objects prefix.
|
|
1013
|
+
|
|
1014
|
+
---
|
|
1015
|
+
|
|
1016
|
+
###### `options`<sup>Required</sup> <a name="options" id="aws-cdk-neuronx-patterns.Model.fromBucket.parameter.options"></a>
|
|
1017
|
+
|
|
1018
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.ModelOptions">ModelOptions</a>
|
|
1019
|
+
|
|
1020
|
+
model basic infromation.
|
|
1021
|
+
|
|
1022
|
+
---
|
|
1023
|
+
|
|
490
1024
|
##### `fromHuggingFace` <a name="fromHuggingFace" id="aws-cdk-neuronx-patterns.Model.fromHuggingFace"></a>
|
|
491
1025
|
|
|
492
1026
|
```typescript
|
|
@@ -519,6 +1053,8 @@ model basic infromation.
|
|
|
519
1053
|
| --- | --- | --- |
|
|
520
1054
|
| <code><a href="#aws-cdk-neuronx-patterns.Model.property.modelId">modelId</a></code> | <code>string</code> | *No description.* |
|
|
521
1055
|
| <code><a href="#aws-cdk-neuronx-patterns.Model.property.options">options</a></code> | <code><a href="#aws-cdk-neuronx-patterns.ModelOptions">ModelOptions</a></code> | *No description.* |
|
|
1056
|
+
| <code><a href="#aws-cdk-neuronx-patterns.Model.property.bucket">bucket</a></code> | <code>aws-cdk-lib.aws_s3.IBucket</code> | *No description.* |
|
|
1057
|
+
| <code><a href="#aws-cdk-neuronx-patterns.Model.property.prefix">prefix</a></code> | <code>string</code> | *No description.* |
|
|
522
1058
|
|
|
523
1059
|
---
|
|
524
1060
|
|
|
@@ -542,6 +1078,26 @@ public readonly options: ModelOptions;
|
|
|
542
1078
|
|
|
543
1079
|
---
|
|
544
1080
|
|
|
1081
|
+
##### `bucket`<sup>Optional</sup> <a name="bucket" id="aws-cdk-neuronx-patterns.Model.property.bucket"></a>
|
|
1082
|
+
|
|
1083
|
+
```typescript
|
|
1084
|
+
public readonly bucket: IBucket;
|
|
1085
|
+
```
|
|
1086
|
+
|
|
1087
|
+
- *Type:* aws-cdk-lib.aws_s3.IBucket
|
|
1088
|
+
|
|
1089
|
+
---
|
|
1090
|
+
|
|
1091
|
+
##### `prefix`<sup>Optional</sup> <a name="prefix" id="aws-cdk-neuronx-patterns.Model.property.prefix"></a>
|
|
1092
|
+
|
|
1093
|
+
```typescript
|
|
1094
|
+
public readonly prefix: string;
|
|
1095
|
+
```
|
|
1096
|
+
|
|
1097
|
+
- *Type:* string
|
|
1098
|
+
|
|
1099
|
+
---
|
|
1100
|
+
|
|
545
1101
|
|
|
546
1102
|
### NeuronxInstanceType <a name="NeuronxInstanceType" id="aws-cdk-neuronx-patterns.NeuronxInstanceType"></a>
|
|
547
1103
|
|
|
@@ -720,6 +1276,182 @@ number of parameters bilionX.
|
|
|
720
1276
|
|
|
721
1277
|
|
|
722
1278
|
|
|
1279
|
+
### TransformersNeuronxSageMakerInferenceModelData <a name="TransformersNeuronxSageMakerInferenceModelData" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData"></a>
|
|
1280
|
+
|
|
1281
|
+
|
|
1282
|
+
#### Static Functions <a name="Static Functions" id="Static Functions"></a>
|
|
1283
|
+
|
|
1284
|
+
| **Name** | **Description** |
|
|
1285
|
+
| --- | --- |
|
|
1286
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromBucket">fromBucket</a></code> | *No description.* |
|
|
1287
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromNeuronxCompile">fromNeuronxCompile</a></code> | *No description.* |
|
|
1288
|
+
|
|
1289
|
+
---
|
|
1290
|
+
|
|
1291
|
+
##### `fromBucket` <a name="fromBucket" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromBucket"></a>
|
|
1292
|
+
|
|
1293
|
+
```typescript
|
|
1294
|
+
import { TransformersNeuronxSageMakerInferenceModelData } from 'aws-cdk-neuronx-patterns'
|
|
1295
|
+
|
|
1296
|
+
TransformersNeuronxSageMakerInferenceModelData.fromBucket(bucket: IBucket, prefix: string, options: BucketCompiledModelOptions)
|
|
1297
|
+
```
|
|
1298
|
+
|
|
1299
|
+
###### `bucket`<sup>Required</sup> <a name="bucket" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromBucket.parameter.bucket"></a>
|
|
1300
|
+
|
|
1301
|
+
- *Type:* aws-cdk-lib.aws_s3.IBucket
|
|
1302
|
+
|
|
1303
|
+
---
|
|
1304
|
+
|
|
1305
|
+
###### `prefix`<sup>Required</sup> <a name="prefix" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromBucket.parameter.prefix"></a>
|
|
1306
|
+
|
|
1307
|
+
- *Type:* string
|
|
1308
|
+
|
|
1309
|
+
---
|
|
1310
|
+
|
|
1311
|
+
###### `options`<sup>Required</sup> <a name="options" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromBucket.parameter.options"></a>
|
|
1312
|
+
|
|
1313
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.BucketCompiledModelOptions">BucketCompiledModelOptions</a>
|
|
1314
|
+
|
|
1315
|
+
---
|
|
1316
|
+
|
|
1317
|
+
##### `fromNeuronxCompile` <a name="fromNeuronxCompile" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromNeuronxCompile"></a>
|
|
1318
|
+
|
|
1319
|
+
```typescript
|
|
1320
|
+
import { TransformersNeuronxSageMakerInferenceModelData } from 'aws-cdk-neuronx-patterns'
|
|
1321
|
+
|
|
1322
|
+
TransformersNeuronxSageMakerInferenceModelData.fromNeuronxCompile(compile: NeuronxCompile, code?: ISource)
|
|
1323
|
+
```
|
|
1324
|
+
|
|
1325
|
+
###### `compile`<sup>Required</sup> <a name="compile" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromNeuronxCompile.parameter.compile"></a>
|
|
1326
|
+
|
|
1327
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.NeuronxCompile">NeuronxCompile</a>
|
|
1328
|
+
|
|
1329
|
+
---
|
|
1330
|
+
|
|
1331
|
+
###### `code`<sup>Optional</sup> <a name="code" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.fromNeuronxCompile.parameter.code"></a>
|
|
1332
|
+
|
|
1333
|
+
- *Type:* aws-cdk-lib.aws_s3_deployment.ISource
|
|
1334
|
+
|
|
1335
|
+
---
|
|
1336
|
+
|
|
1337
|
+
#### Properties <a name="Properties" id="Properties"></a>
|
|
1338
|
+
|
|
1339
|
+
| **Name** | **Type** | **Description** |
|
|
1340
|
+
| --- | --- | --- |
|
|
1341
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.bucket">bucket</a></code> | <code>aws-cdk-lib.aws_s3.IBucket</code> | *No description.* |
|
|
1342
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.code">code</a></code> | <code>aws-cdk-lib.aws_s3_deployment.ISource</code> | *No description.* |
|
|
1343
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.compiledArtifactS3Prefix">compiledArtifactS3Prefix</a></code> | <code>string</code> | *No description.* |
|
|
1344
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.nPositions">nPositions</a></code> | <code>number</code> | *No description.* |
|
|
1345
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.optLevel">optLevel</a></code> | <code><a href="#aws-cdk-neuronx-patterns.OptLevel">OptLevel</a></code> | *No description.* |
|
|
1346
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.parameters">parameters</a></code> | <code><a href="#aws-cdk-neuronx-patterns.Parameters">Parameters</a></code> | *No description.* |
|
|
1347
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.tpDegree">tpDegree</a></code> | <code>number</code> | *No description.* |
|
|
1348
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.compiledArtifactPath">compiledArtifactPath</a></code> | <code>string</code> | *No description.* |
|
|
1349
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.modelIdOrPath">modelIdOrPath</a></code> | <code>string</code> | *No description.* |
|
|
1350
|
+
| <code><a href="#aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.quantDtype">quantDtype</a></code> | <code><a href="#aws-cdk-neuronx-patterns.QuantDtype">QuantDtype</a></code> | *No description.* |
|
|
1351
|
+
|
|
1352
|
+
---
|
|
1353
|
+
|
|
1354
|
+
##### `bucket`<sup>Required</sup> <a name="bucket" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.bucket"></a>
|
|
1355
|
+
|
|
1356
|
+
```typescript
|
|
1357
|
+
public readonly bucket: IBucket;
|
|
1358
|
+
```
|
|
1359
|
+
|
|
1360
|
+
- *Type:* aws-cdk-lib.aws_s3.IBucket
|
|
1361
|
+
|
|
1362
|
+
---
|
|
1363
|
+
|
|
1364
|
+
##### `code`<sup>Required</sup> <a name="code" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.code"></a>
|
|
1365
|
+
|
|
1366
|
+
```typescript
|
|
1367
|
+
public readonly code: ISource;
|
|
1368
|
+
```
|
|
1369
|
+
|
|
1370
|
+
- *Type:* aws-cdk-lib.aws_s3_deployment.ISource
|
|
1371
|
+
|
|
1372
|
+
---
|
|
1373
|
+
|
|
1374
|
+
##### `compiledArtifactS3Prefix`<sup>Required</sup> <a name="compiledArtifactS3Prefix" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.compiledArtifactS3Prefix"></a>
|
|
1375
|
+
|
|
1376
|
+
```typescript
|
|
1377
|
+
public readonly compiledArtifactS3Prefix: string;
|
|
1378
|
+
```
|
|
1379
|
+
|
|
1380
|
+
- *Type:* string
|
|
1381
|
+
|
|
1382
|
+
---
|
|
1383
|
+
|
|
1384
|
+
##### `nPositions`<sup>Required</sup> <a name="nPositions" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.nPositions"></a>
|
|
1385
|
+
|
|
1386
|
+
```typescript
|
|
1387
|
+
public readonly nPositions: number;
|
|
1388
|
+
```
|
|
1389
|
+
|
|
1390
|
+
- *Type:* number
|
|
1391
|
+
|
|
1392
|
+
---
|
|
1393
|
+
|
|
1394
|
+
##### `optLevel`<sup>Required</sup> <a name="optLevel" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.optLevel"></a>
|
|
1395
|
+
|
|
1396
|
+
```typescript
|
|
1397
|
+
public readonly optLevel: OptLevel;
|
|
1398
|
+
```
|
|
1399
|
+
|
|
1400
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.OptLevel">OptLevel</a>
|
|
1401
|
+
|
|
1402
|
+
---
|
|
1403
|
+
|
|
1404
|
+
##### `parameters`<sup>Required</sup> <a name="parameters" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.parameters"></a>
|
|
1405
|
+
|
|
1406
|
+
```typescript
|
|
1407
|
+
public readonly parameters: Parameters;
|
|
1408
|
+
```
|
|
1409
|
+
|
|
1410
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.Parameters">Parameters</a>
|
|
1411
|
+
|
|
1412
|
+
---
|
|
1413
|
+
|
|
1414
|
+
##### `tpDegree`<sup>Required</sup> <a name="tpDegree" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.tpDegree"></a>
|
|
1415
|
+
|
|
1416
|
+
```typescript
|
|
1417
|
+
public readonly tpDegree: number;
|
|
1418
|
+
```
|
|
1419
|
+
|
|
1420
|
+
- *Type:* number
|
|
1421
|
+
|
|
1422
|
+
---
|
|
1423
|
+
|
|
1424
|
+
##### `compiledArtifactPath`<sup>Optional</sup> <a name="compiledArtifactPath" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.compiledArtifactPath"></a>
|
|
1425
|
+
|
|
1426
|
+
```typescript
|
|
1427
|
+
public readonly compiledArtifactPath: string;
|
|
1428
|
+
```
|
|
1429
|
+
|
|
1430
|
+
- *Type:* string
|
|
1431
|
+
|
|
1432
|
+
---
|
|
1433
|
+
|
|
1434
|
+
##### `modelIdOrPath`<sup>Optional</sup> <a name="modelIdOrPath" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.modelIdOrPath"></a>
|
|
1435
|
+
|
|
1436
|
+
```typescript
|
|
1437
|
+
public readonly modelIdOrPath: string;
|
|
1438
|
+
```
|
|
1439
|
+
|
|
1440
|
+
- *Type:* string
|
|
1441
|
+
|
|
1442
|
+
---
|
|
1443
|
+
|
|
1444
|
+
##### `quantDtype`<sup>Optional</sup> <a name="quantDtype" id="aws-cdk-neuronx-patterns.TransformersNeuronxSageMakerInferenceModelData.property.quantDtype"></a>
|
|
1445
|
+
|
|
1446
|
+
```typescript
|
|
1447
|
+
public readonly quantDtype: QuantDtype;
|
|
1448
|
+
```
|
|
1449
|
+
|
|
1450
|
+
- *Type:* <a href="#aws-cdk-neuronx-patterns.QuantDtype">QuantDtype</a>
|
|
1451
|
+
|
|
1452
|
+
---
|
|
1453
|
+
|
|
1454
|
+
|
|
723
1455
|
## Protocols <a name="Protocols" id="Protocols"></a>
|
|
724
1456
|
|
|
725
1457
|
### IAcceleratorChips <a name="IAcceleratorChips" id="aws-cdk-neuronx-patterns.IAcceleratorChips"></a>
|