@daocloud-proto/baize 0.103.3 → 0.104.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/common/k8s.pb.ts CHANGED
@@ -25,4 +25,68 @@ export type KubeVolume = {
25
25
  name?: string
26
26
  mountPath?: string
27
27
  readOnly?: boolean
28
+ }
29
+
30
+ export type Affinity = {
31
+ nodeAffinity?: NodeAffinity
32
+ podAffinity?: PodAffinity
33
+ podAntiAffinity?: PodAntiAffinity
34
+ }
35
+
36
+ export type PodAntiAffinity = {
37
+ requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
38
+ preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
39
+ }
40
+
41
+ export type PodAffinityTerm = {
42
+ labelSelector?: LabelSelector
43
+ namespaces?: string[]
44
+ topologyKey?: string
45
+ namespaceSelector?: LabelSelector
46
+ }
47
+
48
+ export type LabelSelector = {
49
+ matchLabels?: {[key: string]: string}
50
+ matchExpressions?: LabelSelectorRequirement[]
51
+ }
52
+
53
+ export type WeightedPodAffinityTerm = {
54
+ weight?: number
55
+ podAffinityTerm?: PodAffinityTerm
56
+ }
57
+
58
+ export type PodAffinity = {
59
+ requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
60
+ preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
61
+ }
62
+
63
+ export type NodeAffinity = {
64
+ requiredDuringSchedulingIgnoredDuringExecution?: NodeSelector
65
+ preferredDuringSchedulingIgnoredDuringExecution?: PreferredSchedulingTerm[]
66
+ }
67
+
68
+ export type NodeSelector = {
69
+ nodeSelectorTerms?: NodeSelectorTerm[]
70
+ }
71
+
72
+ export type PreferredSchedulingTerm = {
73
+ weight?: number
74
+ preference?: NodeSelectorTerm
75
+ }
76
+
77
+ export type NodeSelectorTerm = {
78
+ matchExpressions?: NodeSelectorRequirement[]
79
+ matchFields?: NodeSelectorRequirement[]
80
+ }
81
+
82
+ export type NodeSelectorRequirement = {
83
+ key?: string
84
+ operator?: string
85
+ values?: string[]
86
+ }
87
+
88
+ export type LabelSelectorRequirement = {
89
+ key?: string
90
+ operator?: string
91
+ values?: string[]
28
92
  }
@@ -41,6 +41,8 @@ export type JobCreationBaseConfig = {
41
41
  annotations?: {[key: string]: string}
42
42
  kubeEnvs?: BaizeCommonK8s.KubeEnv[]
43
43
  kubeVolumes?: BaizeCommonK8s.KubeVolume[]
44
+ tolerationSeconds?: string
45
+ podAntiAffinity?: BaizeCommonK8s.Affinity
44
46
  }
45
47
 
46
48
  export type JobRoleDifferenceConfig = {
@@ -39,6 +39,8 @@ export enum JobActionRequestAction {
39
39
  priorityClass?: string
40
40
  runningDuration?: number
41
41
  totalResources?: BaizeCommonK8s.Resources
42
+ tolerationSeconds?: string
43
+ podAntiAffinity?: BaizeCommonK8s.Affinity
42
44
  }
43
45
 
44
46
  export type Job = BaseJob
@@ -49,7 +49,7 @@ export type PodInstanceListResponse = {
49
49
  page?: BaizeCommonCommon.Pagination
50
50
  }
51
51
 
52
- export class podsManagement {
52
+ export class PodsManagement {
53
53
  static GetPodInstanceList(req: PodRequest, initReq?: fm.InitReq): Promise<PodInstanceListResponse> {
54
54
  return fm.fetchReq<PodRequest, PodInstanceListResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/resources/${req["name"]}/instances?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
55
55
  }
@@ -0,0 +1,368 @@
1
+ /* eslint-disable */
2
+ // @ts-nocheck
3
+ /*
4
+ * This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
5
+ */
6
+
7
+ type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
8
+ type OneOf<T> =
9
+ | { [k in keyof T]?: undefined }
10
+ | (
11
+ keyof T extends infer K ?
12
+ (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
13
+ : never)
14
+ : never);
15
+
16
+ export enum DataType {
17
+ TYPE_INVALID = "TYPE_INVALID",
18
+ TYPE_BOOL = "TYPE_BOOL",
19
+ TYPE_UINT8 = "TYPE_UINT8",
20
+ TYPE_UINT16 = "TYPE_UINT16",
21
+ TYPE_UINT32 = "TYPE_UINT32",
22
+ TYPE_UINT64 = "TYPE_UINT64",
23
+ TYPE_INT8 = "TYPE_INT8",
24
+ TYPE_INT16 = "TYPE_INT16",
25
+ TYPE_INT32 = "TYPE_INT32",
26
+ TYPE_INT64 = "TYPE_INT64",
27
+ TYPE_FP16 = "TYPE_FP16",
28
+ TYPE_FP32 = "TYPE_FP32",
29
+ TYPE_FP64 = "TYPE_FP64",
30
+ TYPE_STRING = "TYPE_STRING",
31
+ TYPE_BF16 = "TYPE_BF16",
32
+ }
33
+
34
+ export enum ModelInstanceGroupKind {
35
+ KIND_AUTO = "KIND_AUTO",
36
+ KIND_GPU = "KIND_GPU",
37
+ KIND_CPU = "KIND_CPU",
38
+ KIND_MODEL = "KIND_MODEL",
39
+ }
40
+
41
+ export enum ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind {
42
+ KIND_NVDLA = "KIND_NVDLA",
43
+ }
44
+
45
+ export enum ModelInputFormat {
46
+ FORMAT_NONE = "FORMAT_NONE",
47
+ FORMAT_NHWC = "FORMAT_NHWC",
48
+ FORMAT_NCHW = "FORMAT_NCHW",
49
+ }
50
+
51
+ export enum BatchInputKind {
52
+ BATCH_ELEMENT_COUNT = "BATCH_ELEMENT_COUNT",
53
+ BATCH_ACCUMULATED_ELEMENT_COUNT = "BATCH_ACCUMULATED_ELEMENT_COUNT",
54
+ BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO",
55
+ BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE",
56
+ BATCH_ITEM_SHAPE = "BATCH_ITEM_SHAPE",
57
+ BATCH_ITEM_SHAPE_FLATTEN = "BATCH_ITEM_SHAPE_FLATTEN",
58
+ }
59
+
60
+ export enum BatchOutputKind {
61
+ BATCH_SCATTER_WITH_INPUT_SHAPE = "BATCH_SCATTER_WITH_INPUT_SHAPE",
62
+ }
63
+
64
+ export enum ModelOptimizationPolicyModelPriority {
65
+ PRIORITY_DEFAULT = "PRIORITY_DEFAULT",
66
+ PRIORITY_MAX = "PRIORITY_MAX",
67
+ PRIORITY_MIN = "PRIORITY_MIN",
68
+ }
69
+
70
+ export enum ModelQueuePolicyTimeoutAction {
71
+ REJECT = "REJECT",
72
+ DELAY = "DELAY",
73
+ }
74
+
75
+ export enum ModelSequenceBatchingControlKind {
76
+ CONTROL_SEQUENCE_START = "CONTROL_SEQUENCE_START",
77
+ CONTROL_SEQUENCE_READY = "CONTROL_SEQUENCE_READY",
78
+ CONTROL_SEQUENCE_END = "CONTROL_SEQUENCE_END",
79
+ CONTROL_SEQUENCE_CORRID = "CONTROL_SEQUENCE_CORRID",
80
+ }
81
+
82
+ export type ModelRateLimiterResource = {
83
+ name?: string
84
+ global?: boolean
85
+ count?: number
86
+ }
87
+
88
+ export type ModelRateLimiter = {
89
+ resources?: ModelRateLimiterResource[]
90
+ priority?: number
91
+ }
92
+
93
+ export type ModelInstanceGroupSecondaryDevice = {
94
+ kind?: ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind
95
+ deviceId?: string
96
+ }
97
+
98
+ export type ModelInstanceGroup = {
99
+ name?: string
100
+ kind?: ModelInstanceGroupKind
101
+ count?: number
102
+ rateLimiter?: ModelRateLimiter
103
+ gpus?: number[]
104
+ secondaryDevices?: ModelInstanceGroupSecondaryDevice[]
105
+ profile?: string[]
106
+ passive?: boolean
107
+ hostPolicy?: string
108
+ }
109
+
110
+ export type ModelTensorReshape = {
111
+ shape?: string[]
112
+ }
113
+
114
+ export type ModelInput = {
115
+ name?: string
116
+ dataType?: DataType
117
+ format?: ModelInputFormat
118
+ dims?: string[]
119
+ reshape?: ModelTensorReshape
120
+ isShapeTensor?: boolean
121
+ allowRaggedBatch?: boolean
122
+ optional?: boolean
123
+ }
124
+
125
+ export type ModelOutput = {
126
+ name?: string
127
+ dataType?: DataType
128
+ dims?: string[]
129
+ reshape?: ModelTensorReshape
130
+ labelFilename?: string
131
+ isShapeTensor?: boolean
132
+ }
133
+
134
+ export type BatchInput = {
135
+ kind?: BatchInputKind
136
+ targetName?: string[]
137
+ dataType?: DataType
138
+ sourceInput?: string[]
139
+ }
140
+
141
+ export type BatchOutput = {
142
+ targetName?: string[]
143
+ kind?: BatchOutputKind
144
+ sourceInput?: string[]
145
+ }
146
+
147
+ export type ModelVersionPolicyLatest = {
148
+ numVersions?: number
149
+ }
150
+
151
+ export type ModelVersionPolicyAll = {
152
+ }
153
+
154
+ export type ModelVersionPolicySpecific = {
155
+ versions?: string[]
156
+ }
157
+
158
+
159
+ /* baize modified */ export type BaseModelVersionPolicy = {
160
+ }
161
+
162
+ export type ModelVersionPolicy = BaseModelVersionPolicy
163
+ & OneOf<{ latest: ModelVersionPolicyLatest; all: ModelVersionPolicyAll; specific: ModelVersionPolicySpecific }>
164
+
165
+ export type ModelOptimizationPolicyGraph = {
166
+ level?: number
167
+ }
168
+
169
+ export type ModelOptimizationPolicyCudaGraphSpecShape = {
170
+ dim?: string[]
171
+ }
172
+
173
+ export type ModelOptimizationPolicyCudaGraphSpecLowerBound = {
174
+ batchSize?: number
175
+ input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
176
+ }
177
+
178
+ export type ModelOptimizationPolicyCudaGraphSpec = {
179
+ batchSize?: number
180
+ input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
181
+ graphLowerBound?: ModelOptimizationPolicyCudaGraphSpecLowerBound
182
+ }
183
+
184
+ export type ModelOptimizationPolicyCuda = {
185
+ graphs?: boolean
186
+ busyWaitEvents?: boolean
187
+ graphSpec?: ModelOptimizationPolicyCudaGraphSpec[]
188
+ outputCopyStream?: boolean
189
+ }
190
+
191
+ export type ModelOptimizationPolicyExecutionAcceleratorsAccelerator = {
192
+ name?: string
193
+ parameters?: {[key: string]: string}
194
+ }
195
+
196
+ export type ModelOptimizationPolicyExecutionAccelerators = {
197
+ gpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
198
+ cpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
199
+ }
200
+
201
+ export type ModelOptimizationPolicyPinnedMemoryBuffer = {
202
+ enable?: boolean
203
+ }
204
+
205
+ export type ModelOptimizationPolicy = {
206
+ graph?: ModelOptimizationPolicyGraph
207
+ priority?: ModelOptimizationPolicyModelPriority
208
+ cuda?: ModelOptimizationPolicyCuda
209
+ executionAccelerators?: ModelOptimizationPolicyExecutionAccelerators
210
+ inputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
211
+ outputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
212
+ gatherKernelBufferThreshold?: number
213
+ eagerBatching?: boolean
214
+ }
215
+
216
+ export type ModelQueuePolicy = {
217
+ timeoutAction?: ModelQueuePolicyTimeoutAction
218
+ defaultTimeoutMicroseconds?: string
219
+ allowTimeoutOverride?: boolean
220
+ maxQueueSize?: number
221
+ }
222
+
223
+ export type ModelDynamicBatching = {
224
+ preferredBatchSize?: number[]
225
+ maxQueueDelayMicroseconds?: string
226
+ preserveOrdering?: boolean
227
+ priorityLevels?: string
228
+ defaultPriorityLevel?: string
229
+ defaultQueuePolicy?: ModelQueuePolicy
230
+ priorityQueuePolicy?: {[key: string]: ModelQueuePolicy}
231
+ }
232
+
233
+ export type ModelSequenceBatchingControl = {
234
+ kind?: ModelSequenceBatchingControlKind
235
+ int32FalseTrue?: number[]
236
+ fp32FalseTrue?: number[]
237
+ boolFalseTrue?: boolean[]
238
+ dataType?: DataType
239
+ }
240
+
241
+ export type ModelSequenceBatchingControlInput = {
242
+ name?: string
243
+ control?: ModelSequenceBatchingControl[]
244
+ }
245
+
246
+
247
+ /* baize modified */ export type BaseModelSequenceBatchingInitialState = {
248
+ dataType?: DataType
249
+ dims?: string[]
250
+ name?: string
251
+ }
252
+
253
+ export type ModelSequenceBatchingInitialState = BaseModelSequenceBatchingInitialState
254
+ & OneOf<{ zeroData: boolean; dataFile: string }>
255
+
256
+ export type ModelSequenceBatchingState = {
257
+ inputName?: string
258
+ outputName?: string
259
+ dataType?: DataType
260
+ dims?: string[]
261
+ initialState?: ModelSequenceBatchingInitialState[]
262
+ useSameBufferForInputOutput?: boolean
263
+ useGrowableMemory?: boolean
264
+ }
265
+
266
+ export type ModelSequenceBatchingStrategyDirect = {
267
+ maxQueueDelayMicroseconds?: string
268
+ minimumSlotUtilization?: number
269
+ }
270
+
271
+ export type ModelSequenceBatchingStrategyOldest = {
272
+ maxCandidateSequences?: number
273
+ preferredBatchSize?: number[]
274
+ maxQueueDelayMicroseconds?: string
275
+ preserveOrdering?: boolean
276
+ }
277
+
278
+
279
+ /* baize modified */ export type BaseModelSequenceBatching = {
280
+ maxSequenceIdleMicroseconds?: string
281
+ controlInput?: ModelSequenceBatchingControlInput[]
282
+ state?: ModelSequenceBatchingState[]
283
+ iterativeSequence?: boolean
284
+ }
285
+
286
+ export type ModelSequenceBatching = BaseModelSequenceBatching
287
+ & OneOf<{ direct: ModelSequenceBatchingStrategyDirect; oldest: ModelSequenceBatchingStrategyOldest }>
288
+
289
+ export type ModelEnsemblingStep = {
290
+ modelName?: string
291
+ modelVersion?: string
292
+ inputMap?: {[key: string]: string}
293
+ outputMap?: {[key: string]: string}
294
+ modelNamespace?: string
295
+ }
296
+
297
+ export type ModelEnsembling = {
298
+ step?: ModelEnsemblingStep[]
299
+ }
300
+
301
+ export type ModelParameter = {
302
+ stringValue?: string
303
+ }
304
+
305
+
306
+ /* baize modified */ export type BaseModelWarmupInput = {
307
+ dataType?: DataType
308
+ dims?: string[]
309
+ }
310
+
311
+ export type ModelWarmupInput = BaseModelWarmupInput
312
+ & OneOf<{ zeroData: boolean; randomData: boolean; inputDataFile: string }>
313
+
314
+ export type ModelWarmup = {
315
+ name?: string
316
+ batchSize?: number
317
+ inputs?: {[key: string]: ModelWarmupInput}
318
+ count?: number
319
+ }
320
+
321
+ export type ModelOperations = {
322
+ opLibraryFilename?: string[]
323
+ }
324
+
325
+ export type ModelTransactionPolicy = {
326
+ decoupled?: boolean
327
+ }
328
+
329
+ export type ModelRepositoryAgentsAgent = {
330
+ name?: string
331
+ parameters?: {[key: string]: string}
332
+ }
333
+
334
+ export type ModelRepositoryAgents = {
335
+ agents?: ModelRepositoryAgentsAgent[]
336
+ }
337
+
338
+ export type ModelResponseCache = {
339
+ enable?: boolean
340
+ }
341
+
342
+
343
+ /* baize modified */ export type BaseModelConfig = {
344
+ name?: string
345
+ platform?: string
346
+ backend?: string
347
+ runtime?: string
348
+ versionPolicy?: ModelVersionPolicy
349
+ maxBatchSize?: number
350
+ input?: ModelInput[]
351
+ output?: ModelOutput[]
352
+ batchInput?: BatchInput[]
353
+ batchOutput?: BatchOutput[]
354
+ optimization?: ModelOptimizationPolicy
355
+ instanceGroup?: ModelInstanceGroup[]
356
+ defaultModelFilename?: string
357
+ ccModelFilenames?: {[key: string]: string}
358
+ metricTags?: {[key: string]: string}
359
+ parameters?: {[key: string]: ModelParameter}
360
+ modelWarmup?: ModelWarmup[]
361
+ modelOperations?: ModelOperations
362
+ modelTransactionPolicy?: ModelTransactionPolicy
363
+ modelRepositoryAgents?: ModelRepositoryAgents
364
+ responseCache?: ModelResponseCache
365
+ }
366
+
367
+ export type ModelConfig = BaseModelConfig
368
+ & OneOf<{ dynamicBatching: ModelDynamicBatching; sequenceBatching: ModelSequenceBatching; ensembleScheduling: ModelEnsembling }>
@@ -0,0 +1,179 @@
1
+ /* eslint-disable */
2
+ // @ts-nocheck
3
+ /*
4
+ * This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
5
+ */
6
+
7
+ import * as BaizeCommonCommon from "../../../common/common.pb"
8
+ import * as BaizeCommonK8s from "../../../common/k8s.pb"
9
+ import * as fm from "../../../fetch.pb"
10
+ import * as BaizeManagement_apiServingTritonModel_config from "../triton/model_config.pb"
11
+
12
+ type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
13
+ type OneOf<T> =
14
+ | { [k in keyof T]?: undefined }
15
+ | (
16
+ keyof T extends infer K ?
17
+ (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
18
+ : never)
19
+ : never);
20
+
21
+ export enum FrameworkType {
22
+ FRAMEWORK_TYPE_UNSPECIFIED = "FRAMEWORK_TYPE_UNSPECIFIED",
23
+ FRAMEWORK_TYPE_TRITON = "FRAMEWORK_TYPE_TRITON",
24
+ }
25
+
26
+ export enum FrameworkTritonBackend {
27
+ TRITON_BACKEND_UNSPECIFIED = "TRITON_BACKEND_UNSPECIFIED",
28
+ TRITON_BACKEND_PYTORCH = "TRITON_BACKEND_PYTORCH",
29
+ TRITON_BACKEND_TENSORFLOW = "TRITON_BACKEND_TENSORFLOW",
30
+ TRITON_BACKEND_VLLM = "TRITON_BACKEND_VLLM",
31
+ TRITON_BACKEND_ONNX = "TRITON_BACKEND_ONNX",
32
+ }
33
+
34
+ export enum InferenceServingServiceType {
35
+ SERVICE_TYPE_UNSPECIFIED = "SERVICE_TYPE_UNSPECIFIED",
36
+ NODE_PORT = "NODE_PORT",
37
+ LOAD_BALANCER = "LOAD_BALANCER",
38
+ CLUSTER_IP = "CLUSTER_IP",
39
+ }
40
+
41
+ export enum InferenceServingStatusPhase {
42
+ PHASE_UNSPECIFIED = "PHASE_UNSPECIFIED",
43
+ PENDING = "PENDING",
44
+ UPDATING_OR_CREATING = "UPDATING_OR_CREATING",
45
+ RUNNING = "RUNNING",
46
+ FAILED = "FAILED",
47
+ }
48
+
49
+ export enum CreateInferenceServingRequestServiceType {
50
+ SERVICE_TYPE_UNSPECIFIED = "SERVICE_TYPE_UNSPECIFIED",
51
+ NODE_PORT = "NODE_PORT",
52
+ LOAD_BALANCER = "LOAD_BALANCER",
53
+ CLUSTER_IP = "CLUSTER_IP",
54
+ }
55
+
56
+ export type FrameworkTriton = {
57
+ secretRef?: string
58
+ backend?: FrameworkTritonBackend
59
+ }
60
+
61
+
62
+ /* baize modified */ export type BaseFramework = {
63
+ type?: FrameworkType
64
+ }
65
+
66
+ export type Framework = BaseFramework
67
+ & OneOf<{ triton: FrameworkTriton }>
68
+
69
+ export type Model = {
70
+ name?: string
71
+ version?: string
72
+ modelPath?: string
73
+ }
74
+
75
+ export type ServingConfigVLLM = {
76
+ trustRemoteCode?: boolean
77
+ tensorParallelSize?: number
78
+ }
79
+
80
+ export type ServingConfigTritonModelConfig = {
81
+ inputs?: BaizeManagement_apiServingTritonModel_config.ModelInput[]
82
+ outputs?: BaizeManagement_apiServingTritonModel_config.ModelOutput[]
83
+ maxBatchSize?: number
84
+ customConfig?: string
85
+ }
86
+
87
+
88
+ /* baize modified */ export type BaseServingConfigTritonServingConfig = {
89
+ customModelConfig?: {[key: string]: string}
90
+ }
91
+
92
+ export type ServingConfigTritonServingConfig = BaseServingConfigTritonServingConfig
93
+ & OneOf<{ config: ServingConfigTritonModelConfig; vllm: ServingConfigVLLM }>
94
+
95
+
96
+ /* baize modified */ export type BaseServingConfig = {
97
+ }
98
+
99
+ export type ServingConfig = BaseServingConfig
100
+ & OneOf<{ triton: ServingConfigTritonServingConfig }>
101
+
102
+ export type InferenceServingServiceConfig = {
103
+ serviceType?: InferenceServingServiceType
104
+ }
105
+
106
+ export type InferenceServing = {
107
+ name?: string
108
+ cluster?: string
109
+ namespace?: string
110
+ models?: ServingConfig[]
111
+ kubeEnvs?: BaizeCommonK8s.KubeEnv[]
112
+ kubeVolumes?: BaizeCommonK8s.KubeVolume[]
113
+ serviceConfig?: InferenceServingServiceConfig
114
+ replicas?: number
115
+ status?: InferenceServingStatus
116
+ }
117
+
118
+ export type InferenceServingStatusModelStatus = {
119
+ name?: string
120
+ accessPath?: string
121
+ }
122
+
123
+ export type InferenceServingStatus = {
124
+ phase?: InferenceServingStatusPhase
125
+ availableReplicas?: number
126
+ models?: InferenceServingStatusModelStatus[]
127
+ accessBaseUrl?: string
128
+ }
129
+
130
+ export type CreateInferenceServingRequestServiceConfig = {
131
+ serviceType?: CreateInferenceServingRequestServiceType
132
+ }
133
+
134
+ export type CreateInferenceServingRequest = {
135
+ workspace?: number
136
+ name?: string
137
+ cluster?: string
138
+ namespace?: string
139
+ models?: ServingConfig[]
140
+ kubeEnvs?: BaizeCommonK8s.KubeEnv[]
141
+ kubeVolumes?: BaizeCommonK8s.KubeVolume[]
142
+ serviceConfig?: CreateInferenceServingRequestServiceConfig
143
+ replicas?: number
144
+ }
145
+
146
+ export type SingleInferenceServingRequest = {
147
+ name?: string
148
+ cluster?: string
149
+ namespace?: string
150
+ workspace?: number
151
+ }
152
+
153
+ export type ListInferenceServingsRequest = {
154
+ name?: string
155
+ cluster?: string
156
+ namespace?: string
157
+ workspace?: number
158
+ page?: BaizeCommonCommon.Pagination
159
+ }
160
+
161
+ export type ListInferenceServingsResponse = {
162
+ items?: InferenceServing[]
163
+ page?: BaizeCommonCommon.Pagination
164
+ }
165
+
166
+ export class InferenceServingManagement {
167
+ static CreateInferenceServing(req: CreateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
168
+ return fm.fetchReq<CreateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
169
+ }
170
+ static GetInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
171
+ return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
172
+ }
173
+ static ListInferenceServings(req: ListInferenceServingsRequest, initReq?: fm.InitReq): Promise<ListInferenceServingsResponse> {
174
+ return fm.fetchReq<ListInferenceServingsRequest, ListInferenceServingsResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace"])}`, {...initReq, method: "GET"})
175
+ }
176
+ static DeleteInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
177
+ return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
178
+ }
179
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@daocloud-proto/baize",
3
- "version": "v0.103.3",
3
+ "version": "v0.104.0",
4
4
  "description": "",
5
5
  "author": "",
6
6
  "license": "ISC"