@daocloud-proto/baize 0.127.0 → 0.127.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/* eslint-disable */
|
|
2
|
+
// @ts-nocheck
|
|
3
|
+
/*
|
|
4
|
+
* This file is a generated Typescript file for GRPC Gateway, DO NOT MODIFY
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import * as BaizeCommonK8s from "../../../common/k8s.pb"
|
|
8
|
+
import * as BaizeManagement_apiImageV1alpha1Image from "../../image/v1alpha1/image.pb"
|
|
9
|
+
|
|
10
|
+
export enum QueueType {
|
|
11
|
+
QUEUE_TYPE_UNSPECIFIED = "QUEUE_TYPE_UNSPECIFIED",
|
|
12
|
+
KUEUE = "KUEUE",
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export enum PlacementPolicy {
|
|
16
|
+
PLACEMENT_POLICY_UNSPECIFIED = "PLACEMENT_POLICY_UNSPECIFIED",
|
|
17
|
+
BALANCED = "BALANCED",
|
|
18
|
+
GROUPED = "GROUPED",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export enum TrainingMode {
|
|
22
|
+
TRAINING_MODE_UNSPECIFIED = "TRAINING_MODE_UNSPECIFIED",
|
|
23
|
+
SINGLE = "SINGLE",
|
|
24
|
+
DISTRIBUTED = "DISTRIBUTED",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export enum JobPhase {
|
|
28
|
+
JOB_PHASE_UNSPECIFIED = "JOB_PHASE_UNSPECIFIED",
|
|
29
|
+
CREATED = "CREATED",
|
|
30
|
+
RUNNING = "RUNNING",
|
|
31
|
+
FAILED = "FAILED",
|
|
32
|
+
SUCCEEDED = "SUCCEEDED",
|
|
33
|
+
SUSPENDED = "SUSPENDED",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export enum JobRole {
|
|
37
|
+
JOB_ROLE_UNSPECIFIED = "JOB_ROLE_UNSPECIFIED",
|
|
38
|
+
MASTER = "MASTER",
|
|
39
|
+
WORKER = "WORKER",
|
|
40
|
+
TF_CHIEF = "TF_CHIEF",
|
|
41
|
+
TF_PS = "TF_PS",
|
|
42
|
+
TF_WORKER = "TF_WORKER",
|
|
43
|
+
TF_EVALUATOR = "TF_EVALUATOR",
|
|
44
|
+
PD_MASTER = "PD_MASTER",
|
|
45
|
+
PD_WORKER = "PD_WORKER",
|
|
46
|
+
MX_SCHEDULER = "MX_SCHEDULER",
|
|
47
|
+
MX_SERVER = "MX_SERVER",
|
|
48
|
+
MX_WORKER = "MX_WORKER",
|
|
49
|
+
MPI_LAUNCHER = "MPI_LAUNCHER",
|
|
50
|
+
MPI_WORKER = "MPI_WORKER",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export type CommonConfig = {
|
|
54
|
+
queueType?: QueueType
|
|
55
|
+
taskQueue?: string
|
|
56
|
+
description?: string
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export type JobCreationBaseConfig = {
|
|
60
|
+
image?: string
|
|
61
|
+
imagePullSecret?: string
|
|
62
|
+
command?: string[]
|
|
63
|
+
args?: string[]
|
|
64
|
+
podConfig?: BaizeCommonK8s.PodConfig
|
|
65
|
+
labels?: {[key: string]: string}
|
|
66
|
+
annotations?: {[key: string]: string}
|
|
67
|
+
workingDir?: string
|
|
68
|
+
shmSize?: number
|
|
69
|
+
noOverrideEnvPath?: boolean
|
|
70
|
+
imageConfig?: BaizeManagement_apiImageV1alpha1Image.ImageConfig
|
|
71
|
+
preflight?: boolean
|
|
72
|
+
placement?: PlacementPolicy
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export type JobRoleDifferenceConfig = {
|
|
76
|
+
replicas?: number
|
|
77
|
+
resources?: BaizeCommonK8s.Resources
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export type RecoveryConfig = {
|
|
81
|
+
enable?: boolean
|
|
82
|
+
}
|
|
@@ -5,97 +5,208 @@
|
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import * as BaizeCommonCommon from "../../../common/common.pb"
|
|
8
|
+
import * as BaizeCommonK8s from "../../../common/k8s.pb"
|
|
8
9
|
import * as fm from "../../../fetch.pb"
|
|
9
|
-
import * as
|
|
10
|
-
import * as
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
10
|
+
import * as GoogleProtobufStruct from "../../../google/protobuf/struct.pb"
|
|
11
|
+
import * as GoogleProtobufTimestamp from "../../../google/protobuf/timestamp.pb"
|
|
12
|
+
import * as BaizeManagement_apiAnalysisV1alpha1Analysis from "../../analysis/v1alpha1/analysis.pb"
|
|
13
|
+
import * as BaizeManagement_apiEventV1alpha1Event from "../../event/v1alpha1/event.pb"
|
|
14
|
+
import * as BaizeManagement_apiJobV1alpha2Common from "./common.pb"
|
|
15
|
+
|
|
16
|
+
type Absent<T, K extends keyof T> = { [k in Exclude<keyof T, K>]?: undefined };
|
|
17
|
+
type OneOf<T> =
|
|
18
|
+
| { [k in keyof T]?: undefined }
|
|
19
|
+
| (
|
|
20
|
+
keyof T extends infer K ?
|
|
21
|
+
(K extends string & keyof T ? { [k in K]: T[K] } & Absent<T, K>
|
|
22
|
+
: never)
|
|
23
|
+
: never);
|
|
24
|
+
|
|
25
|
+
export enum JobType {
|
|
26
|
+
JOB_TYPE_UNSPECIFIED = "JOB_TYPE_UNSPECIFIED",
|
|
27
|
+
PYTORCH = "PYTORCH",
|
|
28
|
+
TENSORFLOW = "TENSORFLOW",
|
|
29
|
+
PADDLE = "PADDLE",
|
|
30
|
+
MPI = "MPI",
|
|
31
|
+
MXNET = "MXNET",
|
|
26
32
|
}
|
|
27
33
|
|
|
28
|
-
export
|
|
34
|
+
export enum RestartPolicy {
|
|
35
|
+
RESTART_POLICY_UNSPECIFIED = "RESTART_POLICY_UNSPECIFIED",
|
|
36
|
+
RESTART_POLICY_NEVER = "RESTART_POLICY_NEVER",
|
|
37
|
+
RESTART_POLICY_ON_FAILURE = "RESTART_POLICY_ON_FAILURE",
|
|
29
38
|
}
|
|
30
39
|
|
|
31
|
-
export
|
|
40
|
+
export enum SuspendReason {
|
|
41
|
+
SUSPEND_REASON_UNSPECIFIED = "SUSPEND_REASON_UNSPECIFIED",
|
|
42
|
+
SUSPEND_REASON_USER_ACTION = "SUSPEND_REASON_USER_ACTION",
|
|
32
43
|
}
|
|
33
44
|
|
|
34
|
-
export
|
|
35
|
-
|
|
36
|
-
|
|
45
|
+
export enum ImagePullPolicy {
|
|
46
|
+
IMAGE_PULL_POLICY_UNSPECIFIED = "IMAGE_PULL_POLICY_UNSPECIFIED",
|
|
47
|
+
IMAGE_PULL_POLICY_ALWAYS = "IMAGE_PULL_POLICY_ALWAYS",
|
|
48
|
+
IMAGE_PULL_POLICY_NEVER = "IMAGE_PULL_POLICY_NEVER",
|
|
49
|
+
IMAGE_PULL_POLICY_IF_NOT_PRESENT = "IMAGE_PULL_POLICY_IF_NOT_PRESENT",
|
|
37
50
|
}
|
|
38
51
|
|
|
39
|
-
export
|
|
40
|
-
|
|
52
|
+
export enum JobActionRequestAction {
|
|
53
|
+
JOB_ACTION_UNSPECIFIED = "JOB_ACTION_UNSPECIFIED",
|
|
54
|
+
RESTART = "RESTART",
|
|
55
|
+
CHANGE_PRIORITY = "CHANGE_PRIORITY",
|
|
56
|
+
SUSPEND = "SUSPEND",
|
|
57
|
+
RESUME = "RESUME",
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export enum JobSchedulersResponseSchedulerFeature {
|
|
61
|
+
FEATURE_UNSPECIFIED = "FEATURE_UNSPECIFIED",
|
|
62
|
+
BINPACK = "BINPACK",
|
|
63
|
+
GANG = "GANG",
|
|
64
|
+
FAIR = "FAIR",
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export type Job = {
|
|
68
|
+
type?: JobType
|
|
41
69
|
cluster?: string
|
|
42
70
|
namespace?: string
|
|
43
71
|
name?: string
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
suspend?: boolean
|
|
72
|
+
trainingMode?: BaizeManagement_apiJobV1alpha2Common.TrainingMode
|
|
73
|
+
creationTimestamp?: GoogleProtobufTimestamp.Timestamp
|
|
74
|
+
jobSpec?: GoogleProtobufStruct.Struct
|
|
48
75
|
description?: string
|
|
76
|
+
baseConfig?: BaizeManagement_apiJobV1alpha2Common.JobCreationBaseConfig
|
|
77
|
+
roleConfig?: {[key: string]: BaizeManagement_apiJobV1alpha2Common.JobRoleDifferenceConfig}
|
|
78
|
+
phase?: BaizeManagement_apiJobV1alpha2Common.JobPhase
|
|
79
|
+
runningDuration?: number
|
|
80
|
+
totalResources?: BaizeCommonK8s.Resources
|
|
81
|
+
analysis?: BaizeManagement_apiAnalysisV1alpha1Analysis.AnalysisConfig
|
|
82
|
+
trainingConfig?: TrainingConfig
|
|
83
|
+
recoveryConfig?: BaizeManagement_apiJobV1alpha2Common.RecoveryConfig
|
|
84
|
+
cleanCheckpointConfig?: CleanCheckpointConfig
|
|
85
|
+
imagePullPolicy?: ImagePullPolicy
|
|
86
|
+
suspendReason?: SuspendReason
|
|
87
|
+
uid?: string
|
|
49
88
|
}
|
|
50
89
|
|
|
51
|
-
export type
|
|
90
|
+
export type ListJobsRequest = {
|
|
91
|
+
workspace?: number
|
|
92
|
+
type?: JobType
|
|
93
|
+
cluster?: string
|
|
94
|
+
namespace?: string
|
|
95
|
+
page?: BaizeCommonCommon.Pagination
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export type ListJobsResponse = {
|
|
99
|
+
items?: Job[]
|
|
100
|
+
page?: BaizeCommonCommon.Pagination
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export type ListRdmasRequest = {
|
|
52
104
|
workspace?: number
|
|
53
105
|
cluster?: string
|
|
54
106
|
namespace?: string
|
|
107
|
+
page?: BaizeCommonCommon.Pagination
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export type Rdma = {
|
|
55
111
|
name?: string
|
|
56
|
-
runtimeRef?: RuntimeRef
|
|
57
|
-
initializer?: Initializer
|
|
58
|
-
trainer?: Trainer
|
|
59
|
-
suspend?: boolean
|
|
60
|
-
description?: string
|
|
61
112
|
}
|
|
62
113
|
|
|
63
|
-
export type
|
|
114
|
+
export type ListRdmasResponse = {
|
|
115
|
+
items?: Rdma[]
|
|
116
|
+
page?: BaizeCommonCommon.Pagination
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
/* baize modified */ export type BaseSingleJobRequest = {
|
|
64
121
|
workspace?: number
|
|
122
|
+
type?: JobType
|
|
65
123
|
cluster?: string
|
|
66
124
|
namespace?: string
|
|
67
125
|
name?: string
|
|
68
126
|
}
|
|
69
127
|
|
|
70
|
-
export type
|
|
128
|
+
export type SingleJobRequest = BaseSingleJobRequest
|
|
129
|
+
& OneOf<{ deleteAnalysis: boolean }>
|
|
130
|
+
|
|
131
|
+
export type CreateJobRequest = {
|
|
71
132
|
workspace?: number
|
|
133
|
+
type?: JobType
|
|
72
134
|
cluster?: string
|
|
73
135
|
namespace?: string
|
|
74
136
|
name?: string
|
|
137
|
+
description?: string
|
|
138
|
+
baseConfig?: BaizeManagement_apiJobV1alpha2Common.JobCreationBaseConfig
|
|
139
|
+
trainingMode?: BaizeManagement_apiJobV1alpha2Common.TrainingMode
|
|
140
|
+
roleConfig?: {[key: string]: BaizeManagement_apiJobV1alpha2Common.JobRoleDifferenceConfig}
|
|
141
|
+
analysis?: BaizeManagement_apiAnalysisV1alpha1Analysis.AnalysisConfig
|
|
142
|
+
trainingConfig?: TrainingConfig
|
|
143
|
+
recoveryConfig?: BaizeManagement_apiJobV1alpha2Common.RecoveryConfig
|
|
144
|
+
cleanCheckpointConfig?: CleanCheckpointConfig
|
|
145
|
+
imagePullPolicy?: ImagePullPolicy
|
|
75
146
|
}
|
|
76
147
|
|
|
77
|
-
export type
|
|
148
|
+
export type CleanCheckpointConfig = {
|
|
149
|
+
cronExpr?: string
|
|
150
|
+
dirs?: string[]
|
|
151
|
+
retained?: number
|
|
152
|
+
timezone?: string
|
|
153
|
+
whitelist?: string
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
export type TrainingConfig = {
|
|
157
|
+
restartPolicy?: RestartPolicy
|
|
158
|
+
maxRetries?: number
|
|
159
|
+
maxTrainingDuration?: string
|
|
160
|
+
rdmaEnabled?: boolean
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
/* baize modified */ export type BaseJobActionRequestParams = {
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export type JobActionRequestParams = BaseJobActionRequestParams
|
|
168
|
+
& OneOf<{ priorityClass: string }>
|
|
169
|
+
|
|
170
|
+
export type JobActionRequest = {
|
|
78
171
|
workspace?: number
|
|
172
|
+
type?: JobType
|
|
79
173
|
cluster?: string
|
|
80
174
|
namespace?: string
|
|
81
|
-
|
|
175
|
+
name?: string
|
|
176
|
+
action?: JobActionRequestAction
|
|
177
|
+
params?: JobActionRequestParams
|
|
82
178
|
}
|
|
83
179
|
|
|
84
|
-
export type
|
|
180
|
+
export type JobSchedulersResponseScheduler = {
|
|
181
|
+
default?: boolean
|
|
182
|
+
name?: string
|
|
183
|
+
alias?: string
|
|
184
|
+
enabledFeatures?: JobSchedulersResponseSchedulerFeature[]
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
export type JobSchedulersResponse = {
|
|
188
|
+
items?: JobSchedulersResponseScheduler[]
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export type ListJobEventsRequest = {
|
|
85
192
|
workspace?: number
|
|
193
|
+
type?: JobType
|
|
86
194
|
cluster?: string
|
|
195
|
+
namespace?: string
|
|
196
|
+
name?: string
|
|
87
197
|
page?: BaizeCommonCommon.Pagination
|
|
88
198
|
}
|
|
89
199
|
|
|
90
|
-
export type
|
|
200
|
+
export type ListRuntimesRequest = {
|
|
91
201
|
workspace?: number
|
|
92
202
|
cluster?: string
|
|
93
203
|
namespace?: string
|
|
94
204
|
page?: BaizeCommonCommon.Pagination
|
|
95
205
|
}
|
|
96
206
|
|
|
97
|
-
export type
|
|
98
|
-
|
|
207
|
+
export type ListClusterRuntimesRequest = {
|
|
208
|
+
workspace?: number
|
|
209
|
+
cluster?: string
|
|
99
210
|
page?: BaizeCommonCommon.Pagination
|
|
100
211
|
}
|
|
101
212
|
|
|
@@ -153,17 +264,17 @@ export type CreateClusterRuntimeRequest = {
|
|
|
153
264
|
}
|
|
154
265
|
|
|
155
266
|
export class JobService {
|
|
156
|
-
static CreateJob(req:
|
|
157
|
-
return fm.fetchReq<
|
|
267
|
+
static CreateJob(req: CreateJobRequest, initReq?: fm.InitReq): Promise<Job> {
|
|
268
|
+
return fm.fetchReq<CreateJobRequest, Job>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/jobs`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
|
|
158
269
|
}
|
|
159
|
-
static GetJob(req:
|
|
160
|
-
return fm.fetchReq<
|
|
270
|
+
static GetJob(req: SingleJobRequest, initReq?: fm.InitReq): Promise<Job> {
|
|
271
|
+
return fm.fetchReq<SingleJobRequest, Job>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/jobs/${req["name"]}?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
|
|
161
272
|
}
|
|
162
|
-
static DeleteJob(req:
|
|
163
|
-
return fm.fetchReq<
|
|
273
|
+
static DeleteJob(req: SingleJobRequest, initReq?: fm.InitReq): Promise<Job> {
|
|
274
|
+
return fm.fetchReq<SingleJobRequest, Job>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/jobs/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
|
|
164
275
|
}
|
|
165
|
-
static ListJobs(req:
|
|
166
|
-
return fm.fetchReq<
|
|
276
|
+
static ListJobs(req: ListJobsRequest, initReq?: fm.InitReq): Promise<ListJobsResponse> {
|
|
277
|
+
return fm.fetchReq<ListJobsRequest, ListJobsResponse>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/jobs?${fm.renderURLSearchParams(req, ["workspace"])}`, {...initReq, method: "GET"})
|
|
167
278
|
}
|
|
168
279
|
static CreateRuntime(req: CreateRuntimeRequest, initReq?: fm.InitReq): Promise<Runtime> {
|
|
169
280
|
return fm.fetchReq<CreateRuntimeRequest, Runtime>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/runtimes`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
|
|
@@ -189,7 +300,16 @@ export class JobService {
|
|
|
189
300
|
static DeleteClusterRuntime(req: DeleteClusterRuntimeRequest, initReq?: fm.InitReq): Promise<Runtime> {
|
|
190
301
|
return fm.fetchReq<DeleteClusterRuntimeRequest, Runtime>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/clusterruntimes/${req["name"]}`, {...initReq, method: "DELETE", body: JSON.stringify(req, fm.replacer)})
|
|
191
302
|
}
|
|
192
|
-
static DoJobAction(req:
|
|
193
|
-
return fm.fetchReq<
|
|
303
|
+
static DoJobAction(req: JobActionRequest, initReq?: fm.InitReq): Promise<Job> {
|
|
304
|
+
return fm.fetchReq<JobActionRequest, Job>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/jobs/${req["name"]}/actions`, {...initReq, method: "POST", body: JSON.stringify(req, fm.replacer)})
|
|
305
|
+
}
|
|
306
|
+
static ListSchedulers(req: CreateJobRequest, initReq?: fm.InitReq): Promise<JobSchedulersResponse> {
|
|
307
|
+
return fm.fetchReq<CreateJobRequest, JobSchedulersResponse>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/jobs/${req["name"]}/schedulers`, {...initReq, method: "PUT", body: JSON.stringify(req, fm.replacer)})
|
|
308
|
+
}
|
|
309
|
+
static ListRdmas(req: ListRdmasRequest, initReq?: fm.InitReq): Promise<ListRdmasResponse> {
|
|
310
|
+
return fm.fetchReq<ListRdmasRequest, ListRdmasResponse>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/rdmas?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace"])}`, {...initReq, method: "GET"})
|
|
311
|
+
}
|
|
312
|
+
static ListJobEvents(req: ListJobEventsRequest, initReq?: fm.InitReq): Promise<BaizeManagement_apiEventV1alpha1Event.ListEventsResponse> {
|
|
313
|
+
return fm.fetchReq<ListJobEventsRequest, BaizeManagement_apiEventV1alpha1Event.ListEventsResponse>(`/apis/baize.io/v1alpha2/workspaces/${req["workspace"]}/clusters/${req["cluster"]}/namespaces/${req["namespace"]}/jobs/${req["name"]}/events?${fm.renderURLSearchParams(req, ["workspace", "cluster", "namespace", "name"])}`, {...initReq, method: "GET"})
|
|
194
314
|
}
|
|
195
315
|
}
|