@daocloud-proto/baize 0.103.3 → 0.104.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/common/k8s.pb.ts CHANGED
@@ -25,4 +25,78 @@ export type KubeVolume = {
25
25
  name?: string
26
26
  mountPath?: string
27
27
  readOnly?: boolean
28
+ }
29
+
30
+ export type PodConfig = {
31
+ kubeEnvs?: KubeEnv[]
32
+ kubeVolumes?: KubeVolume[]
33
+ resources?: Resources
34
+ affinity?: Affinity
35
+ schedulerName?: string
36
+ priorityClass?: string
37
+ queue?: string
38
+ }
39
+
40
+ export type Affinity = {
41
+ nodeAffinity?: NodeAffinity
42
+ podAffinity?: PodAffinity
43
+ podAntiAffinity?: PodAntiAffinity
44
+ }
45
+
46
+ export type PodAntiAffinity = {
47
+ requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
48
+ preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
49
+ }
50
+
51
+ export type PodAffinityTerm = {
52
+ labelSelector?: LabelSelector
53
+ namespaces?: string[]
54
+ topologyKey?: string
55
+ namespaceSelector?: LabelSelector
56
+ }
57
+
58
+ export type LabelSelector = {
59
+ matchLabels?: {[key: string]: string}
60
+ matchExpressions?: LabelSelectorRequirement[]
61
+ }
62
+
63
+ export type WeightedPodAffinityTerm = {
64
+ weight?: number
65
+ podAffinityTerm?: PodAffinityTerm
66
+ }
67
+
68
+ export type PodAffinity = {
69
+ requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
70
+ preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
71
+ }
72
+
73
+ export type NodeAffinity = {
74
+ requiredDuringSchedulingIgnoredDuringExecution?: NodeSelector
75
+ preferredDuringSchedulingIgnoredDuringExecution?: PreferredSchedulingTerm[]
76
+ }
77
+
78
+ export type NodeSelector = {
79
+ nodeSelectorTerms?: NodeSelectorTerm[]
80
+ }
81
+
82
+ export type PreferredSchedulingTerm = {
83
+ weight?: number
84
+ preference?: NodeSelectorTerm
85
+ }
86
+
87
+ export type NodeSelectorTerm = {
88
+ matchExpressions?: NodeSelectorRequirement[]
89
+ matchFields?: NodeSelectorRequirement[]
90
+ }
91
+
92
+ export type NodeSelectorRequirement = {
93
+ key?: string
94
+ operator?: string
95
+ values?: string[]
96
+ }
97
+
98
+ export type LabelSelectorRequirement = {
99
+ key?: string
100
+ operator?: string
101
+ values?: string[]
28
102
  }
@@ -6,6 +6,15 @@
6
6
 
7
7
  import * as BaizeCommonK8s from "../../../common/k8s.pb"
8
8
 
9
+ type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
10
+ type OneOf<T> =
11
+ | { [k in keyof T]?: undefined }
12
+ | (
13
+ keyof T extends infer K ?
14
+ (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
15
+ : never)
16
+ : never);
17
+
9
18
  export enum QueueType {
10
19
  QUEUE_TYPE_UNSPECIFIED = "QUEUE_TYPE_UNSPECIFIED",
11
20
  KUEUE = "KUEUE",
@@ -32,7 +41,8 @@ export type CommonConfig = {
32
41
  description?: string
33
42
  }
34
43
 
35
- export type JobCreationBaseConfig = {
44
+
45
+ /* baize modified */ export type BaseJobCreationBaseConfig = {
36
46
  image?: string
37
47
  imagePullSecret?: string
38
48
  command?: string[]
@@ -41,8 +51,12 @@ export type JobCreationBaseConfig = {
41
51
  annotations?: {[key: string]: string}
42
52
  kubeEnvs?: BaizeCommonK8s.KubeEnv[]
43
53
  kubeVolumes?: BaizeCommonK8s.KubeVolume[]
54
+ affinity?: BaizeCommonK8s.Affinity
44
55
  }
45
56
 
57
+ export type JobCreationBaseConfig = BaseJobCreationBaseConfig
58
+ & OneOf<{ tolerationSeconds: string }>
59
+
46
60
  export type JobRoleDifferenceConfig = {
47
61
  replicas?: number
48
62
  resources?: BaizeCommonK8s.Resources
@@ -39,10 +39,12 @@ export enum JobActionRequestAction {
39
39
  priorityClass?: string
40
40
  runningDuration?: number
41
41
  totalResources?: BaizeCommonK8s.Resources
42
+ affinity?: BaizeCommonK8s.Affinity
42
43
  }
43
44
 
44
45
  export type Job = BaseJob
45
46
  & OneOf<{ pytorch: BaizeManagement_apiJobV1alpha1Pytorch.PyTorchJob; tensorflow: BaizeManagement_apiJobV1alpha1Tfjob.TFJob; paddle: BaizeManagement_apiJobV1alpha1Paddle.PaddleJob }>
47
+ & OneOf<{ tolerationSeconds: string }>
46
48
 
47
49
  export type ListJobsRequest = {
48
50
  workspace?: number
@@ -49,7 +49,7 @@ export type PodInstanceListResponse = {
49
49
  page?: BaizeCommonCommon.Pagination
50
50
  }
51
51
 
52
- export class podsManagement {
52
+ export class PodsManagement {
53
53
  static GetPodInstanceList(req: PodRequest, initReq?: fm.InitReq): Promise<PodInstanceListResponse> {
54
54
  return fm.fetchReq<PodRequest, PodInstanceListResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/resources/${req["name"]}/instances?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
55
55
  }
@@ -60,6 +60,10 @@ export type ListQueueResponse = {
60
60
  page?: BaizeCommonCommon.Pagination
61
61
  }
62
62
 
63
+ export type QueueJSON = {
64
+ data?: string
65
+ }
66
+
63
67
  export type CreateQueueRequest = {
64
68
  type?: QueueType
65
69
  cluster?: string
@@ -139,6 +143,9 @@ export class QueueManagement {
139
143
  static GetQueue(req: GetQueueRequest, initReq?: fm.InitReq): Promise<Queue> {
140
144
  return fm.fetchReq<GetQueueRequest, Queue>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}?${fm.renderURLSearchParams(req, ["cluster", "name"])}`, {...initReq, method: "GET"})
141
145
  }
146
+ static GetQueueByJSON(req: GetQueueRequest, initReq?: fm.InitReq): Promise<QueueJSON> {
147
+ return fm.fetchReq<GetQueueRequest, QueueJSON>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}/json?${fm.renderURLSearchParams(req, ["cluster", "name"])}`, {...initReq, method: "GET"})
148
+ }
142
149
  static DeleteQueue(req: DeleteQueueRequest, initReq?: fm.InitReq): Promise<Queue> {
143
150
  return fm.fetchReq<DeleteQueueRequest, Queue>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
144
151
  }
@@ -0,0 +1,368 @@
1
+ /* eslint-disable */
2
+ // @ts-nocheck
3
+ /*
4
+ * This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
5
+ */
6
+
7
+ type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
8
+ type OneOf<T> =
9
+ | { [k in keyof T]?: undefined }
10
+ | (
11
+ keyof T extends infer K ?
12
+ (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
13
+ : never)
14
+ : never);
15
+
16
+ export enum DataType {
17
+ TYPE_INVALID = "TYPE_INVALID",
18
+ TYPE_BOOL = "TYPE_BOOL",
19
+ TYPE_UINT8 = "TYPE_UINT8",
20
+ TYPE_UINT16 = "TYPE_UINT16",
21
+ TYPE_UINT32 = "TYPE_UINT32",
22
+ TYPE_UINT64 = "TYPE_UINT64",
23
+ TYPE_INT8 = "TYPE_INT8",
24
+ TYPE_INT16 = "TYPE_INT16",
25
+ TYPE_INT32 = "TYPE_INT32",
26
+ TYPE_INT64 = "TYPE_INT64",
27
+ TYPE_FP16 = "TYPE_FP16",
28
+ TYPE_FP32 = "TYPE_FP32",
29
+ TYPE_FP64 = "TYPE_FP64",
30
+ TYPE_STRING = "TYPE_STRING",
31
+ TYPE_BF16 = "TYPE_BF16",
32
+ }
33
+
34
+ export enum ModelInstanceGroupKind {
35
+ KIND_AUTO = "KIND_AUTO",
36
+ KIND_GPU = "KIND_GPU",
37
+ KIND_CPU = "KIND_CPU",
38
+ KIND_MODEL = "KIND_MODEL",
39
+ }
40
+
41
+ export enum ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind {
42
+ KIND_NVDLA = "KIND_NVDLA",
43
+ }
44
+
45
+ export enum ModelInputFormat {
46
+ FORMAT_NONE = "FORMAT_NONE",
47
+ FORMAT_NHWC = "FORMAT_NHWC",
48
+ FORMAT_NCHW = "FORMAT_NCHW",
49
+ }
50
+
51
+ export enum BatchInputKind {
52
+ BATCH_ELEMENT_COUNT = "BATCH_ELEMENT_COUNT",
53
+ BATCH_ACCUMULATED_ELEMENT_COUNT = "BATCH_ACCUMULATED_ELEMENT_COUNT",
54
+ BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO",
55
+ BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE",
56
+ BATCH_ITEM_SHAPE = "BATCH_ITEM_SHAPE",
57
+ BATCH_ITEM_SHAPE_FLATTEN = "BATCH_ITEM_SHAPE_FLATTEN",
58
+ }
59
+
60
+ export enum BatchOutputKind {
61
+ BATCH_SCATTER_WITH_INPUT_SHAPE = "BATCH_SCATTER_WITH_INPUT_SHAPE",
62
+ }
63
+
64
+ export enum ModelOptimizationPolicyModelPriority {
65
+ PRIORITY_DEFAULT = "PRIORITY_DEFAULT",
66
+ PRIORITY_MAX = "PRIORITY_MAX",
67
+ PRIORITY_MIN = "PRIORITY_MIN",
68
+ }
69
+
70
+ export enum ModelQueuePolicyTimeoutAction {
71
+ REJECT = "REJECT",
72
+ DELAY = "DELAY",
73
+ }
74
+
75
+ export enum ModelSequenceBatchingControlKind {
76
+ CONTROL_SEQUENCE_START = "CONTROL_SEQUENCE_START",
77
+ CONTROL_SEQUENCE_READY = "CONTROL_SEQUENCE_READY",
78
+ CONTROL_SEQUENCE_END = "CONTROL_SEQUENCE_END",
79
+ CONTROL_SEQUENCE_CORRID = "CONTROL_SEQUENCE_CORRID",
80
+ }
81
+
82
+ export type ModelRateLimiterResource = {
83
+ name?: string
84
+ global?: boolean
85
+ count?: number
86
+ }
87
+
88
+ export type ModelRateLimiter = {
89
+ resources?: ModelRateLimiterResource[]
90
+ priority?: number
91
+ }
92
+
93
+ export type ModelInstanceGroupSecondaryDevice = {
94
+ kind?: ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind
95
+ deviceId?: string
96
+ }
97
+
98
+ export type ModelInstanceGroup = {
99
+ name?: string
100
+ kind?: ModelInstanceGroupKind
101
+ count?: number
102
+ rateLimiter?: ModelRateLimiter
103
+ gpus?: number[]
104
+ secondaryDevices?: ModelInstanceGroupSecondaryDevice[]
105
+ profile?: string[]
106
+ passive?: boolean
107
+ hostPolicy?: string
108
+ }
109
+
110
+ export type ModelTensorReshape = {
111
+ shape?: string[]
112
+ }
113
+
114
+ export type ModelInput = {
115
+ name?: string
116
+ dataType?: DataType
117
+ format?: ModelInputFormat
118
+ dims?: string[]
119
+ reshape?: ModelTensorReshape
120
+ isShapeTensor?: boolean
121
+ allowRaggedBatch?: boolean
122
+ optional?: boolean
123
+ }
124
+
125
+ export type ModelOutput = {
126
+ name?: string
127
+ dataType?: DataType
128
+ dims?: string[]
129
+ reshape?: ModelTensorReshape
130
+ labelFilename?: string
131
+ isShapeTensor?: boolean
132
+ }
133
+
134
+ export type BatchInput = {
135
+ kind?: BatchInputKind
136
+ targetName?: string[]
137
+ dataType?: DataType
138
+ sourceInput?: string[]
139
+ }
140
+
141
+ export type BatchOutput = {
142
+ targetName?: string[]
143
+ kind?: BatchOutputKind
144
+ sourceInput?: string[]
145
+ }
146
+
147
+ export type ModelVersionPolicyLatest = {
148
+ numVersions?: number
149
+ }
150
+
151
+ export type ModelVersionPolicyAll = {
152
+ }
153
+
154
+ export type ModelVersionPolicySpecific = {
155
+ versions?: string[]
156
+ }
157
+
158
+
159
+ /* baize modified */ export type BaseModelVersionPolicy = {
160
+ }
161
+
162
+ export type ModelVersionPolicy = BaseModelVersionPolicy
163
+ & OneOf<{ latest: ModelVersionPolicyLatest; all: ModelVersionPolicyAll; specific: ModelVersionPolicySpecific }>
164
+
165
+ export type ModelOptimizationPolicyGraph = {
166
+ level?: number
167
+ }
168
+
169
+ export type ModelOptimizationPolicyCudaGraphSpecShape = {
170
+ dim?: string[]
171
+ }
172
+
173
+ export type ModelOptimizationPolicyCudaGraphSpecLowerBound = {
174
+ batchSize?: number
175
+ input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
176
+ }
177
+
178
+ export type ModelOptimizationPolicyCudaGraphSpec = {
179
+ batchSize?: number
180
+ input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
181
+ graphLowerBound?: ModelOptimizationPolicyCudaGraphSpecLowerBound
182
+ }
183
+
184
+ export type ModelOptimizationPolicyCuda = {
185
+ graphs?: boolean
186
+ busyWaitEvents?: boolean
187
+ graphSpec?: ModelOptimizationPolicyCudaGraphSpec[]
188
+ outputCopyStream?: boolean
189
+ }
190
+
191
+ export type ModelOptimizationPolicyExecutionAcceleratorsAccelerator = {
192
+ name?: string
193
+ parameters?: {[key: string]: string}
194
+ }
195
+
196
+ export type ModelOptimizationPolicyExecutionAccelerators = {
197
+ gpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
198
+ cpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
199
+ }
200
+
201
+ export type ModelOptimizationPolicyPinnedMemoryBuffer = {
202
+ enable?: boolean
203
+ }
204
+
205
+ export type ModelOptimizationPolicy = {
206
+ graph?: ModelOptimizationPolicyGraph
207
+ priority?: ModelOptimizationPolicyModelPriority
208
+ cuda?: ModelOptimizationPolicyCuda
209
+ executionAccelerators?: ModelOptimizationPolicyExecutionAccelerators
210
+ inputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
211
+ outputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
212
+ gatherKernelBufferThreshold?: number
213
+ eagerBatching?: boolean
214
+ }
215
+
216
+ export type ModelQueuePolicy = {
217
+ timeoutAction?: ModelQueuePolicyTimeoutAction
218
+ defaultTimeoutMicroseconds?: string
219
+ allowTimeoutOverride?: boolean
220
+ maxQueueSize?: number
221
+ }
222
+
223
+ export type ModelDynamicBatching = {
224
+ preferredBatchSize?: number[]
225
+ maxQueueDelayMicroseconds?: string
226
+ preserveOrdering?: boolean
227
+ priorityLevels?: string
228
+ defaultPriorityLevel?: string
229
+ defaultQueuePolicy?: ModelQueuePolicy
230
+ priorityQueuePolicy?: {[key: string]: ModelQueuePolicy}
231
+ }
232
+
233
+ export type ModelSequenceBatchingControl = {
234
+ kind?: ModelSequenceBatchingControlKind
235
+ int32FalseTrue?: number[]
236
+ fp32FalseTrue?: number[]
237
+ boolFalseTrue?: boolean[]
238
+ dataType?: DataType
239
+ }
240
+
241
+ export type ModelSequenceBatchingControlInput = {
242
+ name?: string
243
+ control?: ModelSequenceBatchingControl[]
244
+ }
245
+
246
+
247
+ /* baize modified */ export type BaseModelSequenceBatchingInitialState = {
248
+ dataType?: DataType
249
+ dims?: string[]
250
+ name?: string
251
+ }
252
+
253
+ export type ModelSequenceBatchingInitialState = BaseModelSequenceBatchingInitialState
254
+ & OneOf<{ zeroData: boolean; dataFile: string }>
255
+
256
+ export type ModelSequenceBatchingState = {
257
+ inputName?: string
258
+ outputName?: string
259
+ dataType?: DataType
260
+ dims?: string[]
261
+ initialState?: ModelSequenceBatchingInitialState[]
262
+ useSameBufferForInputOutput?: boolean
263
+ useGrowableMemory?: boolean
264
+ }
265
+
266
+ export type ModelSequenceBatchingStrategyDirect = {
267
+ maxQueueDelayMicroseconds?: string
268
+ minimumSlotUtilization?: number
269
+ }
270
+
271
+ export type ModelSequenceBatchingStrategyOldest = {
272
+ maxCandidateSequences?: number
273
+ preferredBatchSize?: number[]
274
+ maxQueueDelayMicroseconds?: string
275
+ preserveOrdering?: boolean
276
+ }
277
+
278
+
279
+ /* baize modified */ export type BaseModelSequenceBatching = {
280
+ maxSequenceIdleMicroseconds?: string
281
+ controlInput?: ModelSequenceBatchingControlInput[]
282
+ state?: ModelSequenceBatchingState[]
283
+ iterativeSequence?: boolean
284
+ }
285
+
286
+ export type ModelSequenceBatching = BaseModelSequenceBatching
287
+ & OneOf<{ direct: ModelSequenceBatchingStrategyDirect; oldest: ModelSequenceBatchingStrategyOldest }>
288
+
289
+ export type ModelEnsemblingStep = {
290
+ modelName?: string
291
+ modelVersion?: string
292
+ inputMap?: {[key: string]: string}
293
+ outputMap?: {[key: string]: string}
294
+ modelNamespace?: string
295
+ }
296
+
297
+ export type ModelEnsembling = {
298
+ step?: ModelEnsemblingStep[]
299
+ }
300
+
301
+ export type ModelParameter = {
302
+ stringValue?: string
303
+ }
304
+
305
+
306
+ /* baize modified */ export type BaseModelWarmupInput = {
307
+ dataType?: DataType
308
+ dims?: string[]
309
+ }
310
+
311
+ export type ModelWarmupInput = BaseModelWarmupInput
312
+ & OneOf<{ zeroData: boolean; randomData: boolean; inputDataFile: string }>
313
+
314
+ export type ModelWarmup = {
315
+ name?: string
316
+ batchSize?: number
317
+ inputs?: {[key: string]: ModelWarmupInput}
318
+ count?: number
319
+ }
320
+
321
+ export type ModelOperations = {
322
+ opLibraryFilename?: string[]
323
+ }
324
+
325
+ export type ModelTransactionPolicy = {
326
+ decoupled?: boolean
327
+ }
328
+
329
+ export type ModelRepositoryAgentsAgent = {
330
+ name?: string
331
+ parameters?: {[key: string]: string}
332
+ }
333
+
334
+ export type ModelRepositoryAgents = {
335
+ agents?: ModelRepositoryAgentsAgent[]
336
+ }
337
+
338
+ export type ModelResponseCache = {
339
+ enable?: boolean
340
+ }
341
+
342
+
343
+ /* baize modified */ export type BaseModelConfig = {
344
+ name?: string
345
+ platform?: string
346
+ backend?: string
347
+ runtime?: string
348
+ versionPolicy?: ModelVersionPolicy
349
+ maxBatchSize?: number
350
+ input?: ModelInput[]
351
+ output?: ModelOutput[]
352
+ batchInput?: BatchInput[]
353
+ batchOutput?: BatchOutput[]
354
+ optimization?: ModelOptimizationPolicy
355
+ instanceGroup?: ModelInstanceGroup[]
356
+ defaultModelFilename?: string
357
+ ccModelFilenames?: {[key: string]: string}
358
+ metricTags?: {[key: string]: string}
359
+ parameters?: {[key: string]: ModelParameter}
360
+ modelWarmup?: ModelWarmup[]
361
+ modelOperations?: ModelOperations
362
+ modelTransactionPolicy?: ModelTransactionPolicy
363
+ modelRepositoryAgents?: ModelRepositoryAgents
364
+ responseCache?: ModelResponseCache
365
+ }
366
+
367
+ export type ModelConfig = BaseModelConfig
368
+ & OneOf<{ dynamicBatching: ModelDynamicBatching; sequenceBatching: ModelSequenceBatching; ensembleScheduling: ModelEnsembling }>
@@ -0,0 +1,212 @@
1
+ /* eslint-disable */
2
+ // @ts-nocheck
3
+ /*
4
+ * This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
5
+ */
6
+
7
+ import * as BaizeCommonCommon from "../../../common/common.pb"
8
+ import * as BaizeCommonK8s from "../../../common/k8s.pb"
9
+ import * as fm from "../../../fetch.pb"
10
+ import * as GoogleProtobufTimestamp from "../../../google/protobuf/timestamp.pb"
11
+ import * as BaizeManagement_apiServingTritonModel_config from "../triton/model_config.pb"
12
+
13
+ type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
14
+ type OneOf<T> =
15
+ | { [k in keyof T]?: undefined }
16
+ | (
17
+ keyof T extends infer K ?
18
+ (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
19
+ : never)
20
+ : never);
21
+
22
+ export enum ServiceType {
23
+ SERVICE_TYPE_UNSPECIFIED = "SERVICE_TYPE_UNSPECIFIED",
24
+ NODE_PORT = "NODE_PORT",
25
+ LOAD_BALANCER = "LOAD_BALANCER",
26
+ CLUSTER_IP = "CLUSTER_IP",
27
+ }
28
+
29
+ export enum FrameworkType {
30
+ FRAMEWORK_TYPE_UNSPECIFIED = "FRAMEWORK_TYPE_UNSPECIFIED",
31
+ FRAMEWORK_TYPE_TRITON = "FRAMEWORK_TYPE_TRITON",
32
+ }
33
+
34
+ export enum FrameworkTritonBackend {
35
+ TRITON_BACKEND_UNSPECIFIED = "TRITON_BACKEND_UNSPECIFIED",
36
+ TRITON_BACKEND_PYTORCH = "TRITON_BACKEND_PYTORCH",
37
+ TRITON_BACKEND_TENSORFLOW = "TRITON_BACKEND_TENSORFLOW",
38
+ TRITON_BACKEND_VLLM = "TRITON_BACKEND_VLLM",
39
+ TRITON_BACKEND_ONNX = "TRITON_BACKEND_ONNX",
40
+ }
41
+
42
+ export enum InferenceServingStatusPhase {
43
+ PHASE_UNSPECIFIED = "PHASE_UNSPECIFIED",
44
+ PENDING = "PENDING",
45
+ UPDATING_OR_CREATING = "UPDATING_OR_CREATING",
46
+ RUNNING = "RUNNING",
47
+ FAILED = "FAILED",
48
+ DELETING = "DELETING",
49
+ STOPPED = "STOPPED",
50
+ }
51
+
52
+ export enum ServingAuthAuthType {
53
+ AUTH_TYPE_UNSPECIFIED = "AUTH_TYPE_UNSPECIFIED",
54
+ TRITON_RESTRICTED_KEY = "TRITON_RESTRICTED_KEY",
55
+ }
56
+
57
+ export type FrameworkTriton = {
58
+ backend?: FrameworkTritonBackend
59
+ }
60
+
61
+
62
+ /* baize modified */ export type BaseFramework = {
63
+ type?: FrameworkType
64
+ }
65
+
66
+ export type Framework = BaseFramework
67
+ & OneOf<{ triton: FrameworkTriton }>
68
+
69
+ export type Model = {
70
+ name?: string
71
+ version?: string
72
+ modelPath?: string
73
+ }
74
+
75
+ export type ServingConfigVLLM = {
76
+ trustRemoteCode?: boolean
77
+ tensorParallelSize?: number
78
+ }
79
+
80
+ export type ServingConfigTritonModelConfig = {
81
+ inputs?: BaizeManagement_apiServingTritonModel_config.ModelInput[]
82
+ outputs?: BaizeManagement_apiServingTritonModel_config.ModelOutput[]
83
+ maxBatchSize?: number
84
+ customConfig?: string
85
+ }
86
+
87
+
88
+ /* baize modified */ export type BaseServingConfigTritonServingConfig = {
89
+ customModelConfig?: {[key: string]: string}
90
+ }
91
+
92
+ export type ServingConfigTritonServingConfig = BaseServingConfigTritonServingConfig
93
+ & OneOf<{ config: ServingConfigTritonModelConfig; vllm: ServingConfigVLLM }>
94
+
95
+
96
+ /* baize modified */ export type BaseServingConfig = {
97
+ name?: string
98
+ version?: string
99
+ modelPath?: string
100
+ }
101
+
102
+ export type ServingConfig = BaseServingConfig
103
+ & OneOf<{ triton: ServingConfigTritonServingConfig }>
104
+
105
+ export type ServiceConfig = {
106
+ serviceType?: ServiceType
107
+ }
108
+
109
+ export type InferenceServing = {
110
+ name?: string
111
+ cluster?: string
112
+ namespace?: string
113
+ models?: ServingConfig[]
114
+ serviceConfig?: ServiceConfig
115
+ replicas?: number
116
+ labels?: {[key: string]: string}
117
+ annotations?: {[key: string]: string}
118
+ framework?: Framework
119
+ podConfig?: BaizeCommonK8s.PodConfig
120
+ status?: InferenceServingStatus
121
+ lastUpdated?: GoogleProtobufTimestamp.Timestamp
122
+ }
123
+
124
+ export type InferenceServingStatusModelStatus = {
125
+ name?: string
126
+ accessPath?: string
127
+ }
128
+
129
+ export type InferenceServingStatus = {
130
+ phase?: InferenceServingStatusPhase
131
+ availableReplicas?: number
132
+ models?: InferenceServingStatusModelStatus[]
133
+ accessBaseUrl?: string
134
+ }
135
+
136
+ export type ServingAuthTritonRestrictedKeyValue = {
137
+ key?: string
138
+ value?: string
139
+ }
140
+
141
+
142
+ /* baize modified */ export type BaseServingAuth = {
143
+ authType?: ServingAuthAuthType
144
+ }
145
+
146
+ export type ServingAuth = BaseServingAuth
147
+ & OneOf<{ tritonRestrictedKeyValue: ServingAuthTritonRestrictedKeyValue }>
148
+
149
+ export type CreateInferenceServingRequest = {
150
+ workspace?: number
151
+ name?: string
152
+ cluster?: string
153
+ namespace?: string
154
+ models?: ServingConfig[]
155
+ serviceConfig?: ServiceConfig
156
+ replicas?: number
157
+ labels?: {[key: string]: string}
158
+ annotations?: {[key: string]: string}
159
+ podConfig?: BaizeCommonK8s.PodConfig
160
+ framework?: Framework
161
+ auth?: ServingAuth
162
+ }
163
+
164
+ export type UpdateInferenceServingRequest = {
165
+ workspace?: number
166
+ name?: string
167
+ cluster?: string
168
+ namespace?: string
169
+ models?: ServingConfig[]
170
+ replicas?: number
171
+ labels?: {[key: string]: string}
172
+ annotations?: {[key: string]: string}
173
+ podConfig?: BaizeCommonK8s.PodConfig
174
+ auth?: ServingAuth
175
+ }
176
+
177
+ export type SingleInferenceServingRequest = {
178
+ name?: string
179
+ cluster?: string
180
+ namespace?: string
181
+ workspace?: number
182
+ }
183
+
184
+ export type ListInferenceServingsRequest = {
185
+ cluster?: string
186
+ namespace?: string
187
+ workspace?: number
188
+ page?: BaizeCommonCommon.Pagination
189
+ }
190
+
191
+ export type ListInferenceServingsResponse = {
192
+ items?: InferenceServing[]
193
+ page?: BaizeCommonCommon.Pagination
194
+ }
195
+
196
+ export class InferenceServingManagement {
197
+ static CreateInferenceServing(req: CreateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
198
+ return fm.fetchReq<CreateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
199
+ }
200
+ static GetInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
201
+ return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
202
+ }
203
+ static ListInferenceServings(req: ListInferenceServingsRequest, initReq?: fm.InitReq): Promise<ListInferenceServingsResponse> {
204
+ return fm.fetchReq<ListInferenceServingsRequest, ListInferenceServingsResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace"])}`, {...initReq, method: "GET"})
205
+ }
206
+ static UpdateInferenceServing(req: UpdateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
207
+ return fm.fetchReq<UpdateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "PUT", body: JSON.stringify(req, fm.replacer)})
208
+ }
209
+ static DeleteInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
210
+ return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
211
+ }
212
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@daocloud-proto/baize",
3
- "version": "v0.103.3",
3
+ "version": "v0.104.1",
4
4
  "description": "",
5
5
  "author": "",
6
6
  "license": "ISC"