@daocloud-proto/baize 0.103.3 → 0.104.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/common/k8s.pb.ts +74 -0
- package/management-api/job/v1alpha1/common.pb.ts +15 -1
- package/management-api/job/v1alpha1/job.pb.ts +2 -0
- package/management-api/pod/v1alpha1/pod.pb.ts +1 -1
- package/management-api/queue/v1alpha1/queue.pb.ts +7 -0
- package/management-api/serving/triton/model_config.pb.ts +368 -0
- package/management-api/serving/v1alpha1/serving.pb.ts +212 -0
- package/package.json +1 -1
package/common/k8s.pb.ts
CHANGED
|
@@ -25,4 +25,78 @@ export type KubeVolume = {
|
|
|
25
25
|
name?: string
|
|
26
26
|
mountPath?: string
|
|
27
27
|
readOnly?: boolean
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export type PodConfig = {
|
|
31
|
+
kubeEnvs?: KubeEnv[]
|
|
32
|
+
kubeVolumes?: KubeVolume[]
|
|
33
|
+
resources?: Resources
|
|
34
|
+
affinity?: Affinity
|
|
35
|
+
schedulerName?: string
|
|
36
|
+
priorityClass?: string
|
|
37
|
+
queue?: string
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export type Affinity = {
|
|
41
|
+
nodeAffinity?: NodeAffinity
|
|
42
|
+
podAffinity?: PodAffinity
|
|
43
|
+
podAntiAffinity?: PodAntiAffinity
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type PodAntiAffinity = {
|
|
47
|
+
requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
|
|
48
|
+
preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export type PodAffinityTerm = {
|
|
52
|
+
labelSelector?: LabelSelector
|
|
53
|
+
namespaces?: string[]
|
|
54
|
+
topologyKey?: string
|
|
55
|
+
namespaceSelector?: LabelSelector
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export type LabelSelector = {
|
|
59
|
+
matchLabels?: {[key: string]: string}
|
|
60
|
+
matchExpressions?: LabelSelectorRequirement[]
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export type WeightedPodAffinityTerm = {
|
|
64
|
+
weight?: number
|
|
65
|
+
podAffinityTerm?: PodAffinityTerm
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export type PodAffinity = {
|
|
69
|
+
requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
|
|
70
|
+
preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export type NodeAffinity = {
|
|
74
|
+
requiredDuringSchedulingIgnoredDuringExecution?: NodeSelector
|
|
75
|
+
preferredDuringSchedulingIgnoredDuringExecution?: PreferredSchedulingTerm[]
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export type NodeSelector = {
|
|
79
|
+
nodeSelectorTerms?: NodeSelectorTerm[]
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export type PreferredSchedulingTerm = {
|
|
83
|
+
weight?: number
|
|
84
|
+
preference?: NodeSelectorTerm
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export type NodeSelectorTerm = {
|
|
88
|
+
matchExpressions?: NodeSelectorRequirement[]
|
|
89
|
+
matchFields?: NodeSelectorRequirement[]
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export type NodeSelectorRequirement = {
|
|
93
|
+
key?: string
|
|
94
|
+
operator?: string
|
|
95
|
+
values?: string[]
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export type LabelSelectorRequirement = {
|
|
99
|
+
key?: string
|
|
100
|
+
operator?: string
|
|
101
|
+
values?: string[]
|
|
28
102
|
}
|
|
@@ -6,6 +6,15 @@
|
|
|
6
6
|
|
|
7
7
|
import * as BaizeCommonK8s from "../../../common/k8s.pb"
|
|
8
8
|
|
|
9
|
+
type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
|
|
10
|
+
type OneOf<T> =
|
|
11
|
+
| { [k in keyof T]?: undefined }
|
|
12
|
+
| (
|
|
13
|
+
keyof T extends infer K ?
|
|
14
|
+
(K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
|
|
15
|
+
: never)
|
|
16
|
+
: never);
|
|
17
|
+
|
|
9
18
|
export enum QueueType {
|
|
10
19
|
QUEUE_TYPE_UNSPECIFIED = "QUEUE_TYPE_UNSPECIFIED",
|
|
11
20
|
KUEUE = "KUEUE",
|
|
@@ -32,7 +41,8 @@ export type CommonConfig = {
|
|
|
32
41
|
description?: string
|
|
33
42
|
}
|
|
34
43
|
|
|
35
|
-
|
|
44
|
+
|
|
45
|
+
/* baize modified */ export type BaseJobCreationBaseConfig = {
|
|
36
46
|
image?: string
|
|
37
47
|
imagePullSecret?: string
|
|
38
48
|
command?: string[]
|
|
@@ -41,8 +51,12 @@ export type JobCreationBaseConfig = {
|
|
|
41
51
|
annotations?: {[key: string]: string}
|
|
42
52
|
kubeEnvs?: BaizeCommonK8s.KubeEnv[]
|
|
43
53
|
kubeVolumes?: BaizeCommonK8s.KubeVolume[]
|
|
54
|
+
affinity?: BaizeCommonK8s.Affinity
|
|
44
55
|
}
|
|
45
56
|
|
|
57
|
+
export type JobCreationBaseConfig = BaseJobCreationBaseConfig
|
|
58
|
+
& OneOf<{ tolerationSeconds: string }>
|
|
59
|
+
|
|
46
60
|
export type JobRoleDifferenceConfig = {
|
|
47
61
|
replicas?: number
|
|
48
62
|
resources?: BaizeCommonK8s.Resources
|
|
@@ -39,10 +39,12 @@ export enum JobActionRequestAction {
|
|
|
39
39
|
priorityClass?: string
|
|
40
40
|
runningDuration?: number
|
|
41
41
|
totalResources?: BaizeCommonK8s.Resources
|
|
42
|
+
affinity?: BaizeCommonK8s.Affinity
|
|
42
43
|
}
|
|
43
44
|
|
|
44
45
|
export type Job = BaseJob
|
|
45
46
|
& OneOf<{ pytorch: BaizeManagement_apiJobV1alpha1Pytorch.PyTorchJob; tensorflow: BaizeManagement_apiJobV1alpha1Tfjob.TFJob; paddle: BaizeManagement_apiJobV1alpha1Paddle.PaddleJob }>
|
|
47
|
+
& OneOf<{ tolerationSeconds: string }>
|
|
46
48
|
|
|
47
49
|
export type ListJobsRequest = {
|
|
48
50
|
workspace?: number
|
|
@@ -49,7 +49,7 @@ export type PodInstanceListResponse = {
|
|
|
49
49
|
page?: BaizeCommonCommon.Pagination
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
-
export class
|
|
52
|
+
export class PodsManagement {
|
|
53
53
|
static GetPodInstanceList(req: PodRequest, initReq?: fm.InitReq): Promise<PodInstanceListResponse> {
|
|
54
54
|
return fm.fetchReq<PodRequest, PodInstanceListResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/resources/${req["name"]}/instances?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
|
|
55
55
|
}
|
|
@@ -60,6 +60,10 @@ export type ListQueueResponse = {
|
|
|
60
60
|
page?: BaizeCommonCommon.Pagination
|
|
61
61
|
}
|
|
62
62
|
|
|
63
|
+
export type QueueJSON = {
|
|
64
|
+
data?: string
|
|
65
|
+
}
|
|
66
|
+
|
|
63
67
|
export type CreateQueueRequest = {
|
|
64
68
|
type?: QueueType
|
|
65
69
|
cluster?: string
|
|
@@ -139,6 +143,9 @@ export class QueueManagement {
|
|
|
139
143
|
static GetQueue(req: GetQueueRequest, initReq?: fm.InitReq): Promise<Queue> {
|
|
140
144
|
return fm.fetchReq<GetQueueRequest, Queue>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}?${fm.renderURLSearchParams(req, ["cluster", "name"])}`, {...initReq, method: "GET"})
|
|
141
145
|
}
|
|
146
|
+
static GetQueueByJSON(req: GetQueueRequest, initReq?: fm.InitReq): Promise<QueueJSON> {
|
|
147
|
+
return fm.fetchReq<GetQueueRequest, QueueJSON>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}/json?${fm.renderURLSearchParams(req, ["cluster", "name"])}`, {...initReq, method: "GET"})
|
|
148
|
+
}
|
|
142
149
|
static DeleteQueue(req: DeleteQueueRequest, initReq?: fm.InitReq): Promise<Queue> {
|
|
143
150
|
return fm.fetchReq<DeleteQueueRequest, Queue>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
|
|
144
151
|
}
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
/* eslint-disable */
|
|
2
|
+
// @ts-nocheck
|
|
3
|
+
/*
|
|
4
|
+
* This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
|
|
8
|
+
type OneOf<T> =
|
|
9
|
+
| { [k in keyof T]?: undefined }
|
|
10
|
+
| (
|
|
11
|
+
keyof T extends infer K ?
|
|
12
|
+
(K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
|
|
13
|
+
: never)
|
|
14
|
+
: never);
|
|
15
|
+
|
|
16
|
+
export enum DataType {
|
|
17
|
+
TYPE_INVALID = "TYPE_INVALID",
|
|
18
|
+
TYPE_BOOL = "TYPE_BOOL",
|
|
19
|
+
TYPE_UINT8 = "TYPE_UINT8",
|
|
20
|
+
TYPE_UINT16 = "TYPE_UINT16",
|
|
21
|
+
TYPE_UINT32 = "TYPE_UINT32",
|
|
22
|
+
TYPE_UINT64 = "TYPE_UINT64",
|
|
23
|
+
TYPE_INT8 = "TYPE_INT8",
|
|
24
|
+
TYPE_INT16 = "TYPE_INT16",
|
|
25
|
+
TYPE_INT32 = "TYPE_INT32",
|
|
26
|
+
TYPE_INT64 = "TYPE_INT64",
|
|
27
|
+
TYPE_FP16 = "TYPE_FP16",
|
|
28
|
+
TYPE_FP32 = "TYPE_FP32",
|
|
29
|
+
TYPE_FP64 = "TYPE_FP64",
|
|
30
|
+
TYPE_STRING = "TYPE_STRING",
|
|
31
|
+
TYPE_BF16 = "TYPE_BF16",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export enum ModelInstanceGroupKind {
|
|
35
|
+
KIND_AUTO = "KIND_AUTO",
|
|
36
|
+
KIND_GPU = "KIND_GPU",
|
|
37
|
+
KIND_CPU = "KIND_CPU",
|
|
38
|
+
KIND_MODEL = "KIND_MODEL",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export enum ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind {
|
|
42
|
+
KIND_NVDLA = "KIND_NVDLA",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export enum ModelInputFormat {
|
|
46
|
+
FORMAT_NONE = "FORMAT_NONE",
|
|
47
|
+
FORMAT_NHWC = "FORMAT_NHWC",
|
|
48
|
+
FORMAT_NCHW = "FORMAT_NCHW",
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export enum BatchInputKind {
|
|
52
|
+
BATCH_ELEMENT_COUNT = "BATCH_ELEMENT_COUNT",
|
|
53
|
+
BATCH_ACCUMULATED_ELEMENT_COUNT = "BATCH_ACCUMULATED_ELEMENT_COUNT",
|
|
54
|
+
BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO",
|
|
55
|
+
BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE",
|
|
56
|
+
BATCH_ITEM_SHAPE = "BATCH_ITEM_SHAPE",
|
|
57
|
+
BATCH_ITEM_SHAPE_FLATTEN = "BATCH_ITEM_SHAPE_FLATTEN",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export enum BatchOutputKind {
|
|
61
|
+
BATCH_SCATTER_WITH_INPUT_SHAPE = "BATCH_SCATTER_WITH_INPUT_SHAPE",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export enum ModelOptimizationPolicyModelPriority {
|
|
65
|
+
PRIORITY_DEFAULT = "PRIORITY_DEFAULT",
|
|
66
|
+
PRIORITY_MAX = "PRIORITY_MAX",
|
|
67
|
+
PRIORITY_MIN = "PRIORITY_MIN",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export enum ModelQueuePolicyTimeoutAction {
|
|
71
|
+
REJECT = "REJECT",
|
|
72
|
+
DELAY = "DELAY",
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export enum ModelSequenceBatchingControlKind {
|
|
76
|
+
CONTROL_SEQUENCE_START = "CONTROL_SEQUENCE_START",
|
|
77
|
+
CONTROL_SEQUENCE_READY = "CONTROL_SEQUENCE_READY",
|
|
78
|
+
CONTROL_SEQUENCE_END = "CONTROL_SEQUENCE_END",
|
|
79
|
+
CONTROL_SEQUENCE_CORRID = "CONTROL_SEQUENCE_CORRID",
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export type ModelRateLimiterResource = {
|
|
83
|
+
name?: string
|
|
84
|
+
global?: boolean
|
|
85
|
+
count?: number
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export type ModelRateLimiter = {
|
|
89
|
+
resources?: ModelRateLimiterResource[]
|
|
90
|
+
priority?: number
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export type ModelInstanceGroupSecondaryDevice = {
|
|
94
|
+
kind?: ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind
|
|
95
|
+
deviceId?: string
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export type ModelInstanceGroup = {
|
|
99
|
+
name?: string
|
|
100
|
+
kind?: ModelInstanceGroupKind
|
|
101
|
+
count?: number
|
|
102
|
+
rateLimiter?: ModelRateLimiter
|
|
103
|
+
gpus?: number[]
|
|
104
|
+
secondaryDevices?: ModelInstanceGroupSecondaryDevice[]
|
|
105
|
+
profile?: string[]
|
|
106
|
+
passive?: boolean
|
|
107
|
+
hostPolicy?: string
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export type ModelTensorReshape = {
|
|
111
|
+
shape?: string[]
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export type ModelInput = {
|
|
115
|
+
name?: string
|
|
116
|
+
dataType?: DataType
|
|
117
|
+
format?: ModelInputFormat
|
|
118
|
+
dims?: string[]
|
|
119
|
+
reshape?: ModelTensorReshape
|
|
120
|
+
isShapeTensor?: boolean
|
|
121
|
+
allowRaggedBatch?: boolean
|
|
122
|
+
optional?: boolean
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
export type ModelOutput = {
|
|
126
|
+
name?: string
|
|
127
|
+
dataType?: DataType
|
|
128
|
+
dims?: string[]
|
|
129
|
+
reshape?: ModelTensorReshape
|
|
130
|
+
labelFilename?: string
|
|
131
|
+
isShapeTensor?: boolean
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export type BatchInput = {
|
|
135
|
+
kind?: BatchInputKind
|
|
136
|
+
targetName?: string[]
|
|
137
|
+
dataType?: DataType
|
|
138
|
+
sourceInput?: string[]
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export type BatchOutput = {
|
|
142
|
+
targetName?: string[]
|
|
143
|
+
kind?: BatchOutputKind
|
|
144
|
+
sourceInput?: string[]
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
export type ModelVersionPolicyLatest = {
|
|
148
|
+
numVersions?: number
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export type ModelVersionPolicyAll = {
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export type ModelVersionPolicySpecific = {
|
|
155
|
+
versions?: string[]
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
/* baize modified */ export type BaseModelVersionPolicy = {
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export type ModelVersionPolicy = BaseModelVersionPolicy
|
|
163
|
+
& OneOf<{ latest: ModelVersionPolicyLatest; all: ModelVersionPolicyAll; specific: ModelVersionPolicySpecific }>
|
|
164
|
+
|
|
165
|
+
export type ModelOptimizationPolicyGraph = {
|
|
166
|
+
level?: number
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export type ModelOptimizationPolicyCudaGraphSpecShape = {
|
|
170
|
+
dim?: string[]
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
export type ModelOptimizationPolicyCudaGraphSpecLowerBound = {
|
|
174
|
+
batchSize?: number
|
|
175
|
+
input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export type ModelOptimizationPolicyCudaGraphSpec = {
|
|
179
|
+
batchSize?: number
|
|
180
|
+
input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
|
|
181
|
+
graphLowerBound?: ModelOptimizationPolicyCudaGraphSpecLowerBound
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export type ModelOptimizationPolicyCuda = {
|
|
185
|
+
graphs?: boolean
|
|
186
|
+
busyWaitEvents?: boolean
|
|
187
|
+
graphSpec?: ModelOptimizationPolicyCudaGraphSpec[]
|
|
188
|
+
outputCopyStream?: boolean
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export type ModelOptimizationPolicyExecutionAcceleratorsAccelerator = {
|
|
192
|
+
name?: string
|
|
193
|
+
parameters?: {[key: string]: string}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export type ModelOptimizationPolicyExecutionAccelerators = {
|
|
197
|
+
gpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
|
|
198
|
+
cpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export type ModelOptimizationPolicyPinnedMemoryBuffer = {
|
|
202
|
+
enable?: boolean
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export type ModelOptimizationPolicy = {
|
|
206
|
+
graph?: ModelOptimizationPolicyGraph
|
|
207
|
+
priority?: ModelOptimizationPolicyModelPriority
|
|
208
|
+
cuda?: ModelOptimizationPolicyCuda
|
|
209
|
+
executionAccelerators?: ModelOptimizationPolicyExecutionAccelerators
|
|
210
|
+
inputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
|
|
211
|
+
outputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
|
|
212
|
+
gatherKernelBufferThreshold?: number
|
|
213
|
+
eagerBatching?: boolean
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
export type ModelQueuePolicy = {
|
|
217
|
+
timeoutAction?: ModelQueuePolicyTimeoutAction
|
|
218
|
+
defaultTimeoutMicroseconds?: string
|
|
219
|
+
allowTimeoutOverride?: boolean
|
|
220
|
+
maxQueueSize?: number
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export type ModelDynamicBatching = {
|
|
224
|
+
preferredBatchSize?: number[]
|
|
225
|
+
maxQueueDelayMicroseconds?: string
|
|
226
|
+
preserveOrdering?: boolean
|
|
227
|
+
priorityLevels?: string
|
|
228
|
+
defaultPriorityLevel?: string
|
|
229
|
+
defaultQueuePolicy?: ModelQueuePolicy
|
|
230
|
+
priorityQueuePolicy?: {[key: string]: ModelQueuePolicy}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
export type ModelSequenceBatchingControl = {
|
|
234
|
+
kind?: ModelSequenceBatchingControlKind
|
|
235
|
+
int32FalseTrue?: number[]
|
|
236
|
+
fp32FalseTrue?: number[]
|
|
237
|
+
boolFalseTrue?: boolean[]
|
|
238
|
+
dataType?: DataType
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
export type ModelSequenceBatchingControlInput = {
|
|
242
|
+
name?: string
|
|
243
|
+
control?: ModelSequenceBatchingControl[]
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
/* baize modified */ export type BaseModelSequenceBatchingInitialState = {
|
|
248
|
+
dataType?: DataType
|
|
249
|
+
dims?: string[]
|
|
250
|
+
name?: string
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
export type ModelSequenceBatchingInitialState = BaseModelSequenceBatchingInitialState
|
|
254
|
+
& OneOf<{ zeroData: boolean; dataFile: string }>
|
|
255
|
+
|
|
256
|
+
export type ModelSequenceBatchingState = {
|
|
257
|
+
inputName?: string
|
|
258
|
+
outputName?: string
|
|
259
|
+
dataType?: DataType
|
|
260
|
+
dims?: string[]
|
|
261
|
+
initialState?: ModelSequenceBatchingInitialState[]
|
|
262
|
+
useSameBufferForInputOutput?: boolean
|
|
263
|
+
useGrowableMemory?: boolean
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
export type ModelSequenceBatchingStrategyDirect = {
|
|
267
|
+
maxQueueDelayMicroseconds?: string
|
|
268
|
+
minimumSlotUtilization?: number
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export type ModelSequenceBatchingStrategyOldest = {
|
|
272
|
+
maxCandidateSequences?: number
|
|
273
|
+
preferredBatchSize?: number[]
|
|
274
|
+
maxQueueDelayMicroseconds?: string
|
|
275
|
+
preserveOrdering?: boolean
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
/* baize modified */ export type BaseModelSequenceBatching = {
|
|
280
|
+
maxSequenceIdleMicroseconds?: string
|
|
281
|
+
controlInput?: ModelSequenceBatchingControlInput[]
|
|
282
|
+
state?: ModelSequenceBatchingState[]
|
|
283
|
+
iterativeSequence?: boolean
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export type ModelSequenceBatching = BaseModelSequenceBatching
|
|
287
|
+
& OneOf<{ direct: ModelSequenceBatchingStrategyDirect; oldest: ModelSequenceBatchingStrategyOldest }>
|
|
288
|
+
|
|
289
|
+
export type ModelEnsemblingStep = {
|
|
290
|
+
modelName?: string
|
|
291
|
+
modelVersion?: string
|
|
292
|
+
inputMap?: {[key: string]: string}
|
|
293
|
+
outputMap?: {[key: string]: string}
|
|
294
|
+
modelNamespace?: string
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
export type ModelEnsembling = {
|
|
298
|
+
step?: ModelEnsemblingStep[]
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
export type ModelParameter = {
|
|
302
|
+
stringValue?: string
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
/* baize modified */ export type BaseModelWarmupInput = {
|
|
307
|
+
dataType?: DataType
|
|
308
|
+
dims?: string[]
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
export type ModelWarmupInput = BaseModelWarmupInput
|
|
312
|
+
& OneOf<{ zeroData: boolean; randomData: boolean; inputDataFile: string }>
|
|
313
|
+
|
|
314
|
+
export type ModelWarmup = {
|
|
315
|
+
name?: string
|
|
316
|
+
batchSize?: number
|
|
317
|
+
inputs?: {[key: string]: ModelWarmupInput}
|
|
318
|
+
count?: number
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
export type ModelOperations = {
|
|
322
|
+
opLibraryFilename?: string[]
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
export type ModelTransactionPolicy = {
|
|
326
|
+
decoupled?: boolean
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
export type ModelRepositoryAgentsAgent = {
|
|
330
|
+
name?: string
|
|
331
|
+
parameters?: {[key: string]: string}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
export type ModelRepositoryAgents = {
|
|
335
|
+
agents?: ModelRepositoryAgentsAgent[]
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
export type ModelResponseCache = {
|
|
339
|
+
enable?: boolean
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
/* baize modified */ export type BaseModelConfig = {
|
|
344
|
+
name?: string
|
|
345
|
+
platform?: string
|
|
346
|
+
backend?: string
|
|
347
|
+
runtime?: string
|
|
348
|
+
versionPolicy?: ModelVersionPolicy
|
|
349
|
+
maxBatchSize?: number
|
|
350
|
+
input?: ModelInput[]
|
|
351
|
+
output?: ModelOutput[]
|
|
352
|
+
batchInput?: BatchInput[]
|
|
353
|
+
batchOutput?: BatchOutput[]
|
|
354
|
+
optimization?: ModelOptimizationPolicy
|
|
355
|
+
instanceGroup?: ModelInstanceGroup[]
|
|
356
|
+
defaultModelFilename?: string
|
|
357
|
+
ccModelFilenames?: {[key: string]: string}
|
|
358
|
+
metricTags?: {[key: string]: string}
|
|
359
|
+
parameters?: {[key: string]: ModelParameter}
|
|
360
|
+
modelWarmup?: ModelWarmup[]
|
|
361
|
+
modelOperations?: ModelOperations
|
|
362
|
+
modelTransactionPolicy?: ModelTransactionPolicy
|
|
363
|
+
modelRepositoryAgents?: ModelRepositoryAgents
|
|
364
|
+
responseCache?: ModelResponseCache
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
export type ModelConfig = BaseModelConfig
|
|
368
|
+
& OneOf<{ dynamicBatching: ModelDynamicBatching; sequenceBatching: ModelSequenceBatching; ensembleScheduling: ModelEnsembling }>
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
/* eslint-disable */
|
|
2
|
+
// @ts-nocheck
|
|
3
|
+
/*
|
|
4
|
+
* This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as BaizeCommonCommon from "../../../common/common.pb"
|
|
8
|
+
import * as BaizeCommonK8s from "../../../common/k8s.pb"
|
|
9
|
+
import * as fm from "../../../fetch.pb"
|
|
10
|
+
import * as GoogleProtobufTimestamp from "../../../google/protobuf/timestamp.pb"
|
|
11
|
+
import * as BaizeManagement_apiServingTritonModel_config from "../triton/model_config.pb"
|
|
12
|
+
|
|
13
|
+
type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
|
|
14
|
+
type OneOf<T> =
|
|
15
|
+
| { [k in keyof T]?: undefined }
|
|
16
|
+
| (
|
|
17
|
+
keyof T extends infer K ?
|
|
18
|
+
(K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
|
|
19
|
+
: never)
|
|
20
|
+
: never);
|
|
21
|
+
|
|
22
|
+
export enum ServiceType {
|
|
23
|
+
SERVICE_TYPE_UNSPECIFIED = "SERVICE_TYPE_UNSPECIFIED",
|
|
24
|
+
NODE_PORT = "NODE_PORT",
|
|
25
|
+
LOAD_BALANCER = "LOAD_BALANCER",
|
|
26
|
+
CLUSTER_IP = "CLUSTER_IP",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export enum FrameworkType {
|
|
30
|
+
FRAMEWORK_TYPE_UNSPECIFIED = "FRAMEWORK_TYPE_UNSPECIFIED",
|
|
31
|
+
FRAMEWORK_TYPE_TRITON = "FRAMEWORK_TYPE_TRITON",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export enum FrameworkTritonBackend {
|
|
35
|
+
TRITON_BACKEND_UNSPECIFIED = "TRITON_BACKEND_UNSPECIFIED",
|
|
36
|
+
TRITON_BACKEND_PYTORCH = "TRITON_BACKEND_PYTORCH",
|
|
37
|
+
TRITON_BACKEND_TENSORFLOW = "TRITON_BACKEND_TENSORFLOW",
|
|
38
|
+
TRITON_BACKEND_VLLM = "TRITON_BACKEND_VLLM",
|
|
39
|
+
TRITON_BACKEND_ONNX = "TRITON_BACKEND_ONNX",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export enum InferenceServingStatusPhase {
|
|
43
|
+
PHASE_UNSPECIFIED = "PHASE_UNSPECIFIED",
|
|
44
|
+
PENDING = "PENDING",
|
|
45
|
+
UPDATING_OR_CREATING = "UPDATING_OR_CREATING",
|
|
46
|
+
RUNNING = "RUNNING",
|
|
47
|
+
FAILED = "FAILED",
|
|
48
|
+
DELETING = "DELETING",
|
|
49
|
+
STOPPED = "STOPPED",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export enum ServingAuthAuthType {
|
|
53
|
+
AUTH_TYPE_UNSPECIFIED = "AUTH_TYPE_UNSPECIFIED",
|
|
54
|
+
TRITON_RESTRICTED_KEY = "TRITON_RESTRICTED_KEY",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export type FrameworkTriton = {
|
|
58
|
+
backend?: FrameworkTritonBackend
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
/* baize modified */ export type BaseFramework = {
|
|
63
|
+
type?: FrameworkType
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export type Framework = BaseFramework
|
|
67
|
+
& OneOf<{ triton: FrameworkTriton }>
|
|
68
|
+
|
|
69
|
+
export type Model = {
|
|
70
|
+
name?: string
|
|
71
|
+
version?: string
|
|
72
|
+
modelPath?: string
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export type ServingConfigVLLM = {
|
|
76
|
+
trustRemoteCode?: boolean
|
|
77
|
+
tensorParallelSize?: number
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export type ServingConfigTritonModelConfig = {
|
|
81
|
+
inputs?: BaizeManagement_apiServingTritonModel_config.ModelInput[]
|
|
82
|
+
outputs?: BaizeManagement_apiServingTritonModel_config.ModelOutput[]
|
|
83
|
+
maxBatchSize?: number
|
|
84
|
+
customConfig?: string
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
/* baize modified */ export type BaseServingConfigTritonServingConfig = {
|
|
89
|
+
customModelConfig?: {[key: string]: string}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export type ServingConfigTritonServingConfig = BaseServingConfigTritonServingConfig
|
|
93
|
+
& OneOf<{ config: ServingConfigTritonModelConfig; vllm: ServingConfigVLLM }>
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
/* baize modified */ export type BaseServingConfig = {
|
|
97
|
+
name?: string
|
|
98
|
+
version?: string
|
|
99
|
+
modelPath?: string
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export type ServingConfig = BaseServingConfig
|
|
103
|
+
& OneOf<{ triton: ServingConfigTritonServingConfig }>
|
|
104
|
+
|
|
105
|
+
export type ServiceConfig = {
|
|
106
|
+
serviceType?: ServiceType
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export type InferenceServing = {
|
|
110
|
+
name?: string
|
|
111
|
+
cluster?: string
|
|
112
|
+
namespace?: string
|
|
113
|
+
models?: ServingConfig[]
|
|
114
|
+
serviceConfig?: ServiceConfig
|
|
115
|
+
replicas?: number
|
|
116
|
+
labels?: {[key: string]: string}
|
|
117
|
+
annotations?: {[key: string]: string}
|
|
118
|
+
framework?: Framework
|
|
119
|
+
podConfig?: BaizeCommonK8s.PodConfig
|
|
120
|
+
status?: InferenceServingStatus
|
|
121
|
+
lastUpdated?: GoogleProtobufTimestamp.Timestamp
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export type InferenceServingStatusModelStatus = {
|
|
125
|
+
name?: string
|
|
126
|
+
accessPath?: string
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export type InferenceServingStatus = {
|
|
130
|
+
phase?: InferenceServingStatusPhase
|
|
131
|
+
availableReplicas?: number
|
|
132
|
+
models?: InferenceServingStatusModelStatus[]
|
|
133
|
+
accessBaseUrl?: string
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export type ServingAuthTritonRestrictedKeyValue = {
|
|
137
|
+
key?: string
|
|
138
|
+
value?: string
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
/* baize modified */ export type BaseServingAuth = {
|
|
143
|
+
authType?: ServingAuthAuthType
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
export type ServingAuth = BaseServingAuth
|
|
147
|
+
& OneOf<{ tritonRestrictedKeyValue: ServingAuthTritonRestrictedKeyValue }>
|
|
148
|
+
|
|
149
|
+
export type CreateInferenceServingRequest = {
|
|
150
|
+
workspace?: number
|
|
151
|
+
name?: string
|
|
152
|
+
cluster?: string
|
|
153
|
+
namespace?: string
|
|
154
|
+
models?: ServingConfig[]
|
|
155
|
+
serviceConfig?: ServiceConfig
|
|
156
|
+
replicas?: number
|
|
157
|
+
labels?: {[key: string]: string}
|
|
158
|
+
annotations?: {[key: string]: string}
|
|
159
|
+
podConfig?: BaizeCommonK8s.PodConfig
|
|
160
|
+
framework?: Framework
|
|
161
|
+
auth?: ServingAuth
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export type UpdateInferenceServingRequest = {
|
|
165
|
+
workspace?: number
|
|
166
|
+
name?: string
|
|
167
|
+
cluster?: string
|
|
168
|
+
namespace?: string
|
|
169
|
+
models?: ServingConfig[]
|
|
170
|
+
replicas?: number
|
|
171
|
+
labels?: {[key: string]: string}
|
|
172
|
+
annotations?: {[key: string]: string}
|
|
173
|
+
podConfig?: BaizeCommonK8s.PodConfig
|
|
174
|
+
auth?: ServingAuth
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export type SingleInferenceServingRequest = {
|
|
178
|
+
name?: string
|
|
179
|
+
cluster?: string
|
|
180
|
+
namespace?: string
|
|
181
|
+
workspace?: number
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export type ListInferenceServingsRequest = {
|
|
185
|
+
cluster?: string
|
|
186
|
+
namespace?: string
|
|
187
|
+
workspace?: number
|
|
188
|
+
page?: BaizeCommonCommon.Pagination
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export type ListInferenceServingsResponse = {
|
|
192
|
+
items?: InferenceServing[]
|
|
193
|
+
page?: BaizeCommonCommon.Pagination
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export class InferenceServingManagement {
|
|
197
|
+
static CreateInferenceServing(req: CreateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
|
|
198
|
+
return fm.fetchReq<CreateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
|
|
199
|
+
}
|
|
200
|
+
static GetInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
|
|
201
|
+
return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
|
|
202
|
+
}
|
|
203
|
+
static ListInferenceServings(req: ListInferenceServingsRequest, initReq?: fm.InitReq): Promise<ListInferenceServingsResponse> {
|
|
204
|
+
return fm.fetchReq<ListInferenceServingsRequest, ListInferenceServingsResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace"])}`, {...initReq, method: "GET"})
|
|
205
|
+
}
|
|
206
|
+
static UpdateInferenceServing(req: UpdateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
|
|
207
|
+
return fm.fetchReq<UpdateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "PUT", body: JSON.stringify(req, fm.replacer)})
|
|
208
|
+
}
|
|
209
|
+
static DeleteInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
|
|
210
|
+
return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
|
|
211
|
+
}
|
|
212
|
+
}
|