npm - @daocloud-proto/baize - Versions diffs - 0.103.3 → 0.104.1 - Mend

@daocloud-proto/baize 0.103.3 → 0.104.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/common/k8s.pb.ts +74 -0
package/management-api/job/v1alpha1/common.pb.ts +15 -1
package/management-api/job/v1alpha1/job.pb.ts +2 -0
package/management-api/pod/v1alpha1/pod.pb.ts +1 -1
package/management-api/queue/v1alpha1/queue.pb.ts +7 -0
package/management-api/serving/triton/model_config.pb.ts +368 -0
package/management-api/serving/v1alpha1/serving.pb.ts +212 -0
package/package.json +1 -1

package/common/k8s.pb.ts CHANGED Viewed

@@ -25,4 +25,78 @@ export type KubeVolume = {
   name?: string
   mountPath?: string
   readOnly?: boolean
+}
+export type PodConfig = {
+  kubeEnvs?: KubeEnv[]
+  kubeVolumes?: KubeVolume[]
+  resources?: Resources
+  affinity?: Affinity
+  schedulerName?: string
+  priorityClass?: string
+  queue?: string
+}
+export type Affinity = {
+  nodeAffinity?: NodeAffinity
+  podAffinity?: PodAffinity
+  podAntiAffinity?: PodAntiAffinity
+}
+export type PodAntiAffinity = {
+  requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
+  preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
+}
+export type PodAffinityTerm = {
+  labelSelector?: LabelSelector
+  namespaces?: string[]
+  topologyKey?: string
+  namespaceSelector?: LabelSelector
+}
+export type LabelSelector = {
+  matchLabels?: {[key: string]: string}
+  matchExpressions?: LabelSelectorRequirement[]
+}
+export type WeightedPodAffinityTerm = {
+  weight?: number
+  podAffinityTerm?: PodAffinityTerm
+}
+export type PodAffinity = {
+  requiredDuringSchedulingIgnoredDuringExecution?: PodAffinityTerm[]
+  preferredDuringSchedulingIgnoredDuringExecution?: WeightedPodAffinityTerm[]
+}
+export type NodeAffinity = {
+  requiredDuringSchedulingIgnoredDuringExecution?: NodeSelector
+  preferredDuringSchedulingIgnoredDuringExecution?: PreferredSchedulingTerm[]
+}
+export type NodeSelector = {
+  nodeSelectorTerms?: NodeSelectorTerm[]
+}
+export type PreferredSchedulingTerm = {
+  weight?: number
+  preference?: NodeSelectorTerm
+}
+export type NodeSelectorTerm = {
+  matchExpressions?: NodeSelectorRequirement[]
+  matchFields?: NodeSelectorRequirement[]
+}
+export type NodeSelectorRequirement = {
+  key?: string
+  operator?: string
+  values?: string[]
+}
+export type LabelSelectorRequirement = {
+  key?: string
+  operator?: string
+  values?: string[]
 }

package/management-api/job/v1alpha1/common.pb.ts CHANGED Viewed

@@ -6,6 +6,15 @@
 import * as BaizeCommonK8s from "../../../common/k8s.pb"
+type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
+type OneOf<T> =
+  | { [k in keyof T]?: undefined }
+  | (
+    keyof T extends infer K ?
+      (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
+        : never)
+    : never);
 export enum QueueType {
   QUEUE_TYPE_UNSPECIFIED = "QUEUE_TYPE_UNSPECIFIED",
   KUEUE = "KUEUE",
@@ -32,7 +41,8 @@ export type CommonConfig = {
   description?: string
 }
-export type JobCreationBaseConfig = {
+/* baize modified */ export type BaseJobCreationBaseConfig = {
   image?: string
   imagePullSecret?: string
   command?: string[]
@@ -41,8 +51,12 @@ export type JobCreationBaseConfig = {
   annotations?: {[key: string]: string}
   kubeEnvs?: BaizeCommonK8s.KubeEnv[]
   kubeVolumes?: BaizeCommonK8s.KubeVolume[]
+  affinity?: BaizeCommonK8s.Affinity
 }
+export type JobCreationBaseConfig = BaseJobCreationBaseConfig
+  & OneOf<{ tolerationSeconds: string }>
 export type JobRoleDifferenceConfig = {
   replicas?: number
   resources?: BaizeCommonK8s.Resources

package/management-api/job/v1alpha1/job.pb.ts CHANGED Viewed

@@ -39,10 +39,12 @@ export enum JobActionRequestAction {
   priorityClass?: string
   runningDuration?: number
   totalResources?: BaizeCommonK8s.Resources
+  affinity?: BaizeCommonK8s.Affinity
 }
 export type Job = BaseJob
   & OneOf<{ pytorch: BaizeManagement_apiJobV1alpha1Pytorch.PyTorchJob; tensorflow: BaizeManagement_apiJobV1alpha1Tfjob.TFJob; paddle: BaizeManagement_apiJobV1alpha1Paddle.PaddleJob }>
+  & OneOf<{ tolerationSeconds: string }>
 export type ListJobsRequest = {
   workspace?: number

package/management-api/pod/v1alpha1/pod.pb.ts CHANGED Viewed

@@ -49,7 +49,7 @@ export type PodInstanceListResponse = {
   page?: BaizeCommonCommon.Pagination
 }
-export class podsManagement {
+export class PodsManagement {
   static GetPodInstanceList(req: PodRequest, initReq?: fm.InitReq): Promise<PodInstanceListResponse> {
     return fm.fetchReq<PodRequest, PodInstanceListResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/resources/${req["name"]}/instances?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
   }

package/management-api/queue/v1alpha1/queue.pb.ts CHANGED Viewed

@@ -60,6 +60,10 @@ export type ListQueueResponse = {
   page?: BaizeCommonCommon.Pagination
 }
+export type QueueJSON = {
+  data?: string
+}
 export type CreateQueueRequest = {
   type?: QueueType
   cluster?: string
@@ -139,6 +143,9 @@ export class QueueManagement {
   static GetQueue(req: GetQueueRequest, initReq?: fm.InitReq): Promise<Queue> {
     return fm.fetchReq<GetQueueRequest, Queue>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}?${fm.renderURLSearchParams(req, ["cluster", "name"])}`, {...initReq, method: "GET"})
   }
+  static GetQueueByJSON(req: GetQueueRequest, initReq?: fm.InitReq): Promise<QueueJSON> {
+    return fm.fetchReq<GetQueueRequest, QueueJSON>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}/json?${fm.renderURLSearchParams(req, ["cluster", "name"])}`, {...initReq, method: "GET"})
+  }
   static DeleteQueue(req: DeleteQueueRequest, initReq?: fm.InitReq): Promise<Queue> {
     return fm.fetchReq<DeleteQueueRequest, Queue>(`/apis/baize.io/v1alpha1/clusters/${req["cluster"]}/queues/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
   }

package/management-api/serving/triton/model_config.pb.ts ADDED Viewed

@@ -0,0 +1,368 @@
+/* eslint-disable */
+// @ts-nocheck
+/*
+* This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
+*/
+type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
+type OneOf<T> =
+  | { [k in keyof T]?: undefined }
+  | (
+    keyof T extends infer K ?
+      (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
+        : never)
+    : never);
+export enum DataType {
+  TYPE_INVALID = "TYPE_INVALID",
+  TYPE_BOOL = "TYPE_BOOL",
+  TYPE_UINT8 = "TYPE_UINT8",
+  TYPE_UINT16 = "TYPE_UINT16",
+  TYPE_UINT32 = "TYPE_UINT32",
+  TYPE_UINT64 = "TYPE_UINT64",
+  TYPE_INT8 = "TYPE_INT8",
+  TYPE_INT16 = "TYPE_INT16",
+  TYPE_INT32 = "TYPE_INT32",
+  TYPE_INT64 = "TYPE_INT64",
+  TYPE_FP16 = "TYPE_FP16",
+  TYPE_FP32 = "TYPE_FP32",
+  TYPE_FP64 = "TYPE_FP64",
+  TYPE_STRING = "TYPE_STRING",
+  TYPE_BF16 = "TYPE_BF16",
+}
+export enum ModelInstanceGroupKind {
+  KIND_AUTO = "KIND_AUTO",
+  KIND_GPU = "KIND_GPU",
+  KIND_CPU = "KIND_CPU",
+  KIND_MODEL = "KIND_MODEL",
+}
+export enum ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind {
+  KIND_NVDLA = "KIND_NVDLA",
+}
+export enum ModelInputFormat {
+  FORMAT_NONE = "FORMAT_NONE",
+  FORMAT_NHWC = "FORMAT_NHWC",
+  FORMAT_NCHW = "FORMAT_NCHW",
+}
+export enum BatchInputKind {
+  BATCH_ELEMENT_COUNT = "BATCH_ELEMENT_COUNT",
+  BATCH_ACCUMULATED_ELEMENT_COUNT = "BATCH_ACCUMULATED_ELEMENT_COUNT",
+  BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO = "BATCH_ACCUMULATED_ELEMENT_COUNT_WITH_ZERO",
+  BATCH_MAX_ELEMENT_COUNT_AS_SHAPE = "BATCH_MAX_ELEMENT_COUNT_AS_SHAPE",
+  BATCH_ITEM_SHAPE = "BATCH_ITEM_SHAPE",
+  BATCH_ITEM_SHAPE_FLATTEN = "BATCH_ITEM_SHAPE_FLATTEN",
+}
+export enum BatchOutputKind {
+  BATCH_SCATTER_WITH_INPUT_SHAPE = "BATCH_SCATTER_WITH_INPUT_SHAPE",
+}
+export enum ModelOptimizationPolicyModelPriority {
+  PRIORITY_DEFAULT = "PRIORITY_DEFAULT",
+  PRIORITY_MAX = "PRIORITY_MAX",
+  PRIORITY_MIN = "PRIORITY_MIN",
+}
+export enum ModelQueuePolicyTimeoutAction {
+  REJECT = "REJECT",
+  DELAY = "DELAY",
+}
+export enum ModelSequenceBatchingControlKind {
+  CONTROL_SEQUENCE_START = "CONTROL_SEQUENCE_START",
+  CONTROL_SEQUENCE_READY = "CONTROL_SEQUENCE_READY",
+  CONTROL_SEQUENCE_END = "CONTROL_SEQUENCE_END",
+  CONTROL_SEQUENCE_CORRID = "CONTROL_SEQUENCE_CORRID",
+}
+export type ModelRateLimiterResource = {
+  name?: string
+  global?: boolean
+  count?: number
+}
+export type ModelRateLimiter = {
+  resources?: ModelRateLimiterResource[]
+  priority?: number
+}
+export type ModelInstanceGroupSecondaryDevice = {
+  kind?: ModelInstanceGroupSecondaryDeviceSecondaryDeviceKind
+  deviceId?: string
+}
+export type ModelInstanceGroup = {
+  name?: string
+  kind?: ModelInstanceGroupKind
+  count?: number
+  rateLimiter?: ModelRateLimiter
+  gpus?: number[]
+  secondaryDevices?: ModelInstanceGroupSecondaryDevice[]
+  profile?: string[]
+  passive?: boolean
+  hostPolicy?: string
+}
+export type ModelTensorReshape = {
+  shape?: string[]
+}
+export type ModelInput = {
+  name?: string
+  dataType?: DataType
+  format?: ModelInputFormat
+  dims?: string[]
+  reshape?: ModelTensorReshape
+  isShapeTensor?: boolean
+  allowRaggedBatch?: boolean
+  optional?: boolean
+}
+export type ModelOutput = {
+  name?: string
+  dataType?: DataType
+  dims?: string[]
+  reshape?: ModelTensorReshape
+  labelFilename?: string
+  isShapeTensor?: boolean
+}
+export type BatchInput = {
+  kind?: BatchInputKind
+  targetName?: string[]
+  dataType?: DataType
+  sourceInput?: string[]
+}
+export type BatchOutput = {
+  targetName?: string[]
+  kind?: BatchOutputKind
+  sourceInput?: string[]
+}
+export type ModelVersionPolicyLatest = {
+  numVersions?: number
+}
+export type ModelVersionPolicyAll = {
+}
+export type ModelVersionPolicySpecific = {
+  versions?: string[]
+}
+/* baize modified */ export type BaseModelVersionPolicy = {
+}
+export type ModelVersionPolicy = BaseModelVersionPolicy
+  & OneOf<{ latest: ModelVersionPolicyLatest; all: ModelVersionPolicyAll; specific: ModelVersionPolicySpecific }>
+export type ModelOptimizationPolicyGraph = {
+  level?: number
+}
+export type ModelOptimizationPolicyCudaGraphSpecShape = {
+  dim?: string[]
+}
+export type ModelOptimizationPolicyCudaGraphSpecLowerBound = {
+  batchSize?: number
+  input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
+}
+export type ModelOptimizationPolicyCudaGraphSpec = {
+  batchSize?: number
+  input?: {[key: string]: ModelOptimizationPolicyCudaGraphSpecShape}
+  graphLowerBound?: ModelOptimizationPolicyCudaGraphSpecLowerBound
+}
+export type ModelOptimizationPolicyCuda = {
+  graphs?: boolean
+  busyWaitEvents?: boolean
+  graphSpec?: ModelOptimizationPolicyCudaGraphSpec[]
+  outputCopyStream?: boolean
+}
+export type ModelOptimizationPolicyExecutionAcceleratorsAccelerator = {
+  name?: string
+  parameters?: {[key: string]: string}
+}
+export type ModelOptimizationPolicyExecutionAccelerators = {
+  gpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
+  cpuExecutionAccelerator?: ModelOptimizationPolicyExecutionAcceleratorsAccelerator[]
+}
+export type ModelOptimizationPolicyPinnedMemoryBuffer = {
+  enable?: boolean
+}
+export type ModelOptimizationPolicy = {
+  graph?: ModelOptimizationPolicyGraph
+  priority?: ModelOptimizationPolicyModelPriority
+  cuda?: ModelOptimizationPolicyCuda
+  executionAccelerators?: ModelOptimizationPolicyExecutionAccelerators
+  inputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
+  outputPinnedMemory?: ModelOptimizationPolicyPinnedMemoryBuffer
+  gatherKernelBufferThreshold?: number
+  eagerBatching?: boolean
+}
+export type ModelQueuePolicy = {
+  timeoutAction?: ModelQueuePolicyTimeoutAction
+  defaultTimeoutMicroseconds?: string
+  allowTimeoutOverride?: boolean
+  maxQueueSize?: number
+}
+export type ModelDynamicBatching = {
+  preferredBatchSize?: number[]
+  maxQueueDelayMicroseconds?: string
+  preserveOrdering?: boolean
+  priorityLevels?: string
+  defaultPriorityLevel?: string
+  defaultQueuePolicy?: ModelQueuePolicy
+  priorityQueuePolicy?: {[key: string]: ModelQueuePolicy}
+}
+export type ModelSequenceBatchingControl = {
+  kind?: ModelSequenceBatchingControlKind
+  int32FalseTrue?: number[]
+  fp32FalseTrue?: number[]
+  boolFalseTrue?: boolean[]
+  dataType?: DataType
+}
+export type ModelSequenceBatchingControlInput = {
+  name?: string
+  control?: ModelSequenceBatchingControl[]
+}
+/* baize modified */ export type BaseModelSequenceBatchingInitialState = {
+  dataType?: DataType
+  dims?: string[]
+  name?: string
+}
+export type ModelSequenceBatchingInitialState = BaseModelSequenceBatchingInitialState
+  & OneOf<{ zeroData: boolean; dataFile: string }>
+export type ModelSequenceBatchingState = {
+  inputName?: string
+  outputName?: string
+  dataType?: DataType
+  dims?: string[]
+  initialState?: ModelSequenceBatchingInitialState[]
+  useSameBufferForInputOutput?: boolean
+  useGrowableMemory?: boolean
+}
+export type ModelSequenceBatchingStrategyDirect = {
+  maxQueueDelayMicroseconds?: string
+  minimumSlotUtilization?: number
+}
+export type ModelSequenceBatchingStrategyOldest = {
+  maxCandidateSequences?: number
+  preferredBatchSize?: number[]
+  maxQueueDelayMicroseconds?: string
+  preserveOrdering?: boolean
+}
+/* baize modified */ export type BaseModelSequenceBatching = {
+  maxSequenceIdleMicroseconds?: string
+  controlInput?: ModelSequenceBatchingControlInput[]
+  state?: ModelSequenceBatchingState[]
+  iterativeSequence?: boolean
+}
+export type ModelSequenceBatching = BaseModelSequenceBatching
+  & OneOf<{ direct: ModelSequenceBatchingStrategyDirect; oldest: ModelSequenceBatchingStrategyOldest }>
+export type ModelEnsemblingStep = {
+  modelName?: string
+  modelVersion?: string
+  inputMap?: {[key: string]: string}
+  outputMap?: {[key: string]: string}
+  modelNamespace?: string
+}
+export type ModelEnsembling = {
+  step?: ModelEnsemblingStep[]
+}
+export type ModelParameter = {
+  stringValue?: string
+}
+/* baize modified */ export type BaseModelWarmupInput = {
+  dataType?: DataType
+  dims?: string[]
+}
+export type ModelWarmupInput = BaseModelWarmupInput
+  & OneOf<{ zeroData: boolean; randomData: boolean; inputDataFile: string }>
+export type ModelWarmup = {
+  name?: string
+  batchSize?: number
+  inputs?: {[key: string]: ModelWarmupInput}
+  count?: number
+}
+export type ModelOperations = {
+  opLibraryFilename?: string[]
+}
+export type ModelTransactionPolicy = {
+  decoupled?: boolean
+}
+export type ModelRepositoryAgentsAgent = {
+  name?: string
+  parameters?: {[key: string]: string}
+}
+export type ModelRepositoryAgents = {
+  agents?: ModelRepositoryAgentsAgent[]
+}
+export type ModelResponseCache = {
+  enable?: boolean
+}
+/* baize modified */ export type BaseModelConfig = {
+  name?: string
+  platform?: string
+  backend?: string
+  runtime?: string
+  versionPolicy?: ModelVersionPolicy
+  maxBatchSize?: number
+  input?: ModelInput[]
+  output?: ModelOutput[]
+  batchInput?: BatchInput[]
+  batchOutput?: BatchOutput[]
+  optimization?: ModelOptimizationPolicy
+  instanceGroup?: ModelInstanceGroup[]
+  defaultModelFilename?: string
+  ccModelFilenames?: {[key: string]: string}
+  metricTags?: {[key: string]: string}
+  parameters?: {[key: string]: ModelParameter}
+  modelWarmup?: ModelWarmup[]
+  modelOperations?: ModelOperations
+  modelTransactionPolicy?: ModelTransactionPolicy
+  modelRepositoryAgents?: ModelRepositoryAgents
+  responseCache?: ModelResponseCache
+}
+export type ModelConfig = BaseModelConfig
+  & OneOf<{ dynamicBatching: ModelDynamicBatching; sequenceBatching: ModelSequenceBatching; ensembleScheduling: ModelEnsembling }>

package/management-api/serving/v1alpha1/serving.pb.ts ADDED Viewed

@@ -0,0 +1,212 @@
+/* eslint-disable */
+// @ts-nocheck
+/*
+* This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
+*/
+import * as BaizeCommonCommon from "../../../common/common.pb"
+import * as BaizeCommonK8s from "../../../common/k8s.pb"
+import * as fm from "../../../fetch.pb"
+import * as GoogleProtobufTimestamp from "../../../google/protobuf/timestamp.pb"
+import * as BaizeManagement_apiServingTritonModel_config from "../triton/model_config.pb"
+type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
+type OneOf<T> =
+  | { [k in keyof T]?: undefined }
+  | (
+    keyof T extends infer K ?
+      (K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
+        : never)
+    : never);
+export enum ServiceType {
+  SERVICE_TYPE_UNSPECIFIED = "SERVICE_TYPE_UNSPECIFIED",
+  NODE_PORT = "NODE_PORT",
+  LOAD_BALANCER = "LOAD_BALANCER",
+  CLUSTER_IP = "CLUSTER_IP",
+}
+export enum FrameworkType {
+  FRAMEWORK_TYPE_UNSPECIFIED = "FRAMEWORK_TYPE_UNSPECIFIED",
+  FRAMEWORK_TYPE_TRITON = "FRAMEWORK_TYPE_TRITON",
+}
+export enum FrameworkTritonBackend {
+  TRITON_BACKEND_UNSPECIFIED = "TRITON_BACKEND_UNSPECIFIED",
+  TRITON_BACKEND_PYTORCH = "TRITON_BACKEND_PYTORCH",
+  TRITON_BACKEND_TENSORFLOW = "TRITON_BACKEND_TENSORFLOW",
+  TRITON_BACKEND_VLLM = "TRITON_BACKEND_VLLM",
+  TRITON_BACKEND_ONNX = "TRITON_BACKEND_ONNX",
+}
+export enum InferenceServingStatusPhase {
+  PHASE_UNSPECIFIED = "PHASE_UNSPECIFIED",
+  PENDING = "PENDING",
+  UPDATING_OR_CREATING = "UPDATING_OR_CREATING",
+  RUNNING = "RUNNING",
+  FAILED = "FAILED",
+  DELETING = "DELETING",
+  STOPPED = "STOPPED",
+}
+export enum ServingAuthAuthType {
+  AUTH_TYPE_UNSPECIFIED = "AUTH_TYPE_UNSPECIFIED",
+  TRITON_RESTRICTED_KEY = "TRITON_RESTRICTED_KEY",
+}
+export type FrameworkTriton = {
+  backend?: FrameworkTritonBackend
+}
+/* baize modified */ export type BaseFramework = {
+  type?: FrameworkType
+}
+export type Framework = BaseFramework
+  & OneOf<{ triton: FrameworkTriton }>
+export type Model = {
+  name?: string
+  version?: string
+  modelPath?: string
+}
+export type ServingConfigVLLM = {
+  trustRemoteCode?: boolean
+  tensorParallelSize?: number
+}
+export type ServingConfigTritonModelConfig = {
+  inputs?: BaizeManagement_apiServingTritonModel_config.ModelInput[]
+  outputs?: BaizeManagement_apiServingTritonModel_config.ModelOutput[]
+  maxBatchSize?: number
+  customConfig?: string
+}
+/* baize modified */ export type BaseServingConfigTritonServingConfig = {
+  customModelConfig?: {[key: string]: string}
+}
+export type ServingConfigTritonServingConfig = BaseServingConfigTritonServingConfig
+  & OneOf<{ config: ServingConfigTritonModelConfig; vllm: ServingConfigVLLM }>
+/* baize modified */ export type BaseServingConfig = {
+  name?: string
+  version?: string
+  modelPath?: string
+}
+export type ServingConfig = BaseServingConfig
+  & OneOf<{ triton: ServingConfigTritonServingConfig }>
+export type ServiceConfig = {
+  serviceType?: ServiceType
+}
+export type InferenceServing = {
+  name?: string
+  cluster?: string
+  namespace?: string
+  models?: ServingConfig[]
+  serviceConfig?: ServiceConfig
+  replicas?: number
+  labels?: {[key: string]: string}
+  annotations?: {[key: string]: string}
+  framework?: Framework
+  podConfig?: BaizeCommonK8s.PodConfig
+  status?: InferenceServingStatus
+  lastUpdated?: GoogleProtobufTimestamp.Timestamp
+}
+export type InferenceServingStatusModelStatus = {
+  name?: string
+  accessPath?: string
+}
+export type InferenceServingStatus = {
+  phase?: InferenceServingStatusPhase
+  availableReplicas?: number
+  models?: InferenceServingStatusModelStatus[]
+  accessBaseUrl?: string
+}
+export type ServingAuthTritonRestrictedKeyValue = {
+  key?: string
+  value?: string
+}
+/* baize modified */ export type BaseServingAuth = {
+  authType?: ServingAuthAuthType
+}
+export type ServingAuth = BaseServingAuth
+  & OneOf<{ tritonRestrictedKeyValue: ServingAuthTritonRestrictedKeyValue }>
+export type CreateInferenceServingRequest = {
+  workspace?: number
+  name?: string
+  cluster?: string
+  namespace?: string
+  models?: ServingConfig[]
+  serviceConfig?: ServiceConfig
+  replicas?: number
+  labels?: {[key: string]: string}
+  annotations?: {[key: string]: string}
+  podConfig?: BaizeCommonK8s.PodConfig
+  framework?: Framework
+  auth?: ServingAuth
+}
+export type UpdateInferenceServingRequest = {
+  workspace?: number
+  name?: string
+  cluster?: string
+  namespace?: string
+  models?: ServingConfig[]
+  replicas?: number
+  labels?: {[key: string]: string}
+  annotations?: {[key: string]: string}
+  podConfig?: BaizeCommonK8s.PodConfig
+  auth?: ServingAuth
+}
+export type SingleInferenceServingRequest = {
+  name?: string
+  cluster?: string
+  namespace?: string
+  workspace?: number
+}
+export type ListInferenceServingsRequest = {
+  cluster?: string
+  namespace?: string
+  workspace?: number
+  page?: BaizeCommonCommon.Pagination
+}
+export type ListInferenceServingsResponse = {
+  items?: InferenceServing[]
+  page?: BaizeCommonCommon.Pagination
+}
+export class InferenceServingManagement {
+  static CreateInferenceServing(req: CreateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
+    return fm.fetchReq<CreateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
+  }
+  static GetInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
+    return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
+  }
+  static ListInferenceServings(req: ListInferenceServingsRequest, initReq?: fm.InitReq): Promise<ListInferenceServingsResponse> {
+    return fm.fetchReq<ListInferenceServingsRequest, ListInferenceServingsResponse>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace"])}`, {...initReq, method: "GET"})
+  }
+  static UpdateInferenceServing(req: UpdateInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
+    return fm.fetchReq<UpdateInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "PUT", body: JSON.stringify(req, fm.replacer)})
+  }
+  static DeleteInferenceServing(req: SingleInferenceServingRequest, initReq?: fm.InitReq): Promise<InferenceServing> {
+    return fm.fetchReq<SingleInferenceServingRequest, InferenceServing>(`/apis/baize.io/v1alpha1/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/inference-serving/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
+  }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@daocloud-proto/baize",
-  "version": "v0.103.3",
+  "version": "v0.104.1",
   "description": "",
   "author": "",
   "license": "ISC"