dataiku-sdk 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/packages/types/src/index.d.ts +4 -0
- package/dist/packages/types/src/index.js +1 -0
- package/dist/src/cli.js +398 -41
- package/dist/src/errors.js +12 -0
- package/dist/src/index.d.ts +4 -4
- package/dist/src/resources/connections.d.ts +10 -0
- package/dist/src/resources/connections.js +16 -0
- package/dist/src/resources/datasets.d.ts +18 -0
- package/dist/src/resources/datasets.js +41 -0
- package/dist/src/resources/jobs.d.ts +52 -19
- package/dist/src/resources/jobs.js +121 -33
- package/dist/src/resources/recipes.d.ts +54 -1
- package/dist/src/resources/recipes.js +235 -0
- package/dist/src/resources/sql.js +84 -3
- package/package.json +1 -1
- package/packages/types/dist/index.d.ts +4 -0
- package/packages/types/dist/index.js +1 -0
|
@@ -26,6 +26,66 @@ const RECIPE_DEFINITION_FIELDS = new Set(["params", "inputs", "outputs", "script
|
|
|
26
26
|
function rootRecipeDefinitionFields(data) {
|
|
27
27
|
return Object.keys(data).filter((key) => RECIPE_DEFINITION_FIELDS.has(key));
|
|
28
28
|
}
|
|
29
|
+
function normalizeRecipeOutputType(value) {
|
|
30
|
+
if (typeof value !== "string")
|
|
31
|
+
return undefined;
|
|
32
|
+
const normalized = value.trim().toUpperCase().replace(/-/g, "_");
|
|
33
|
+
if (normalized === "DATASET")
|
|
34
|
+
return "DATASET";
|
|
35
|
+
if (normalized === "MANAGED_FOLDER" || normalized === "FOLDER")
|
|
36
|
+
return "MANAGED_FOLDER";
|
|
37
|
+
return undefined;
|
|
38
|
+
}
|
|
39
|
+
function recipeOutputItems(recipe) {
|
|
40
|
+
const outputs = asRecord(recipe.outputs);
|
|
41
|
+
if (!outputs)
|
|
42
|
+
return [];
|
|
43
|
+
const result = [];
|
|
44
|
+
const seen = new Set();
|
|
45
|
+
for (const [role, roleValue,] of Object.entries(outputs)) {
|
|
46
|
+
const items = asRecord(roleValue)?.items;
|
|
47
|
+
if (!Array.isArray(items))
|
|
48
|
+
continue;
|
|
49
|
+
for (const itemValue of items) {
|
|
50
|
+
const item = asRecord(itemValue);
|
|
51
|
+
const ref = asString(item?.ref);
|
|
52
|
+
if (!ref)
|
|
53
|
+
continue;
|
|
54
|
+
const seenKey = ref;
|
|
55
|
+
if (seen.has(seenKey))
|
|
56
|
+
continue;
|
|
57
|
+
seen.add(seenKey);
|
|
58
|
+
const type = normalizeRecipeOutputType(item?.type ?? item?.targetType ?? item?.objectType);
|
|
59
|
+
result.push({
|
|
60
|
+
ref,
|
|
61
|
+
role,
|
|
62
|
+
...(type ? { type, } : {}),
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return result;
|
|
67
|
+
}
|
|
68
|
+
function recipeInputItems(recipe) {
|
|
69
|
+
const inputs = asRecord(recipe.inputs);
|
|
70
|
+
if (!inputs)
|
|
71
|
+
return [];
|
|
72
|
+
const result = [];
|
|
73
|
+
const seen = new Set();
|
|
74
|
+
for (const [role, roleValue,] of Object.entries(inputs)) {
|
|
75
|
+
const items = asRecord(roleValue)?.items;
|
|
76
|
+
if (!Array.isArray(items))
|
|
77
|
+
continue;
|
|
78
|
+
for (const itemValue of items) {
|
|
79
|
+
const item = asRecord(itemValue);
|
|
80
|
+
const ref = asString(item?.ref);
|
|
81
|
+
if (!ref || seen.has(ref))
|
|
82
|
+
continue;
|
|
83
|
+
seen.add(ref);
|
|
84
|
+
result.push({ ref, role, });
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
29
89
|
function inferRecipeCodeExtension(recipeType) {
|
|
30
90
|
const normalized = typeof recipeType === "string" ? recipeType.trim().toLowerCase() : "";
|
|
31
91
|
if (!normalized)
|
|
@@ -92,6 +152,181 @@ export class RecipesResource extends BaseResource {
|
|
|
92
152
|
}
|
|
93
153
|
return opts?.includePayload ? { ...result, recipe, } : { recipe, };
|
|
94
154
|
}
|
|
155
|
+
/** Validate declared recipe graph references before running/building. */
|
|
156
|
+
async validateGraph(recipeName, opts) {
|
|
157
|
+
const pk = this.resolveProjectKey(opts?.projectKey);
|
|
158
|
+
const { recipe, } = await this.get(recipeName, { projectKey: pk, });
|
|
159
|
+
const inputItems = recipeInputItems(recipe);
|
|
160
|
+
const outputItems = recipeOutputItems(recipe);
|
|
161
|
+
const [datasets, folders,] = await Promise.all([
|
|
162
|
+
this.client.datasets.list(pk),
|
|
163
|
+
this.client.folders.list(pk),
|
|
164
|
+
]);
|
|
165
|
+
const datasetNames = new Set(datasets.map((dataset) => dataset.name));
|
|
166
|
+
const folderIdByRef = new Map();
|
|
167
|
+
for (const folder of folders) {
|
|
168
|
+
folderIdByRef.set(folder.id, folder.id);
|
|
169
|
+
if (folder.name)
|
|
170
|
+
folderIdByRef.set(folder.name, folder.id);
|
|
171
|
+
}
|
|
172
|
+
const resolveReference = (item, requireExplicitOutputType) => {
|
|
173
|
+
const folderId = folderIdByRef.get(item.ref);
|
|
174
|
+
const isDataset = datasetNames.has(item.ref);
|
|
175
|
+
if (item.type === "DATASET") {
|
|
176
|
+
return { ref: item.ref, role: item.role, type: "DATASET", exists: isDataset, id: item.ref, };
|
|
177
|
+
}
|
|
178
|
+
if (item.type === "MANAGED_FOLDER") {
|
|
179
|
+
return {
|
|
180
|
+
ref: item.ref,
|
|
181
|
+
role: item.role,
|
|
182
|
+
type: "MANAGED_FOLDER",
|
|
183
|
+
exists: folderId !== undefined,
|
|
184
|
+
id: folderId ?? item.ref,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
if (isDataset && (!folderId || !requireExplicitOutputType)) {
|
|
188
|
+
return { ref: item.ref, role: item.role, type: "DATASET", exists: true, id: item.ref, };
|
|
189
|
+
}
|
|
190
|
+
if (folderId && !isDataset) {
|
|
191
|
+
return { ref: item.ref, role: item.role, type: "MANAGED_FOLDER", exists: true, id: folderId, };
|
|
192
|
+
}
|
|
193
|
+
return { ref: item.ref, role: item.role, exists: false, };
|
|
194
|
+
};
|
|
195
|
+
const inputs = inputItems.map((item) => resolveReference(item, false));
|
|
196
|
+
const outputs = outputItems.map((item) => resolveReference(item, true));
|
|
197
|
+
const ambiguousOutputs = outputItems
|
|
198
|
+
.filter((item) => !item.type && datasetNames.has(item.ref) && folderIdByRef.has(item.ref))
|
|
199
|
+
.map((item) => item.ref);
|
|
200
|
+
const missingInputs = inputs.filter((item) => !item.exists);
|
|
201
|
+
const missingOutputs = outputs.filter((item) => !item.exists);
|
|
202
|
+
const warnings = [];
|
|
203
|
+
if (outputItems.length === 0)
|
|
204
|
+
warnings.push("Recipe has no declared outputs to build.");
|
|
205
|
+
for (const ref of ambiguousOutputs) {
|
|
206
|
+
warnings.push(`Output "${ref}" matches both a dataset and a managed folder; declare an explicit output type.`);
|
|
207
|
+
}
|
|
208
|
+
for (const output of outputs) {
|
|
209
|
+
if (!output.exists) {
|
|
210
|
+
warnings.push(`Declared output "${output.ref}" was not found in project "${pk}".`);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
for (const input of missingInputs) {
|
|
214
|
+
warnings.push(`Declared input "${input.ref}" was not found in project "${pk}".`);
|
|
215
|
+
}
|
|
216
|
+
return {
|
|
217
|
+
valid: missingInputs.length === 0
|
|
218
|
+
&& missingOutputs.length === 0
|
|
219
|
+
&& ambiguousOutputs.length === 0
|
|
220
|
+
&& outputItems.length > 0,
|
|
221
|
+
recipeName,
|
|
222
|
+
projectKey: pk,
|
|
223
|
+
inputs,
|
|
224
|
+
outputs,
|
|
225
|
+
missingInputs,
|
|
226
|
+
missingOutputs,
|
|
227
|
+
ambiguousOutputs,
|
|
228
|
+
warnings,
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
/** Resolve recipe outputs to job-build targets. */
|
|
232
|
+
async resolveRunOutputs(recipeName, opts) {
|
|
233
|
+
const pk = this.resolveProjectKey(opts?.projectKey);
|
|
234
|
+
const { recipe, } = await this.get(recipeName, { projectKey: pk, });
|
|
235
|
+
const outputItems = recipeOutputItems(recipe);
|
|
236
|
+
if (outputItems.length === 0) {
|
|
237
|
+
throw new Error(`Recipe "${recipeName}" has no output items to build.`);
|
|
238
|
+
}
|
|
239
|
+
const [datasets, folders,] = await Promise.all([
|
|
240
|
+
this.client.datasets.list(pk),
|
|
241
|
+
this.client.folders.list(pk),
|
|
242
|
+
]);
|
|
243
|
+
const datasetNames = new Set(datasets.map((dataset) => dataset.name));
|
|
244
|
+
const folderIdByRef = new Map();
|
|
245
|
+
for (const folder of folders) {
|
|
246
|
+
folderIdByRef.set(folder.id, folder.id);
|
|
247
|
+
if (folder.name)
|
|
248
|
+
folderIdByRef.set(folder.name, folder.id);
|
|
249
|
+
}
|
|
250
|
+
return outputItems.map((item) => {
|
|
251
|
+
if (item.type === "DATASET") {
|
|
252
|
+
return {
|
|
253
|
+
ref: item.ref,
|
|
254
|
+
role: item.role,
|
|
255
|
+
id: item.ref,
|
|
256
|
+
type: "DATASET",
|
|
257
|
+
projectKey: pk,
|
|
258
|
+
partition: opts?.partition,
|
|
259
|
+
};
|
|
260
|
+
}
|
|
261
|
+
const folderId = folderIdByRef.get(item.ref);
|
|
262
|
+
if (item.type === "MANAGED_FOLDER") {
|
|
263
|
+
return {
|
|
264
|
+
ref: item.ref,
|
|
265
|
+
role: item.role,
|
|
266
|
+
id: folderId ?? item.ref,
|
|
267
|
+
type: "MANAGED_FOLDER",
|
|
268
|
+
projectKey: pk,
|
|
269
|
+
partition: opts?.partition,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
const isDataset = datasetNames.has(item.ref);
|
|
273
|
+
if (isDataset && folderId) {
|
|
274
|
+
throw new Error(`Recipe "${recipeName}" output "${item.ref}" matches both a dataset and a managed folder. Add an explicit output type to the recipe definition or build the target directly with --target-type.`);
|
|
275
|
+
}
|
|
276
|
+
if (folderId) {
|
|
277
|
+
return {
|
|
278
|
+
ref: item.ref,
|
|
279
|
+
role: item.role,
|
|
280
|
+
id: folderId,
|
|
281
|
+
type: "MANAGED_FOLDER",
|
|
282
|
+
projectKey: pk,
|
|
283
|
+
partition: opts?.partition,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
if (isDataset) {
|
|
287
|
+
return {
|
|
288
|
+
ref: item.ref,
|
|
289
|
+
role: item.role,
|
|
290
|
+
id: item.ref,
|
|
291
|
+
type: "DATASET",
|
|
292
|
+
projectKey: pk,
|
|
293
|
+
partition: opts?.partition,
|
|
294
|
+
};
|
|
295
|
+
}
|
|
296
|
+
throw new Error(`Recipe "${recipeName}" output "${item.ref}" was not found as a dataset or managed folder in project "${pk}".`);
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
/** Run a recipe by building its resolved outputs. */
|
|
300
|
+
async run(recipeName, opts) {
|
|
301
|
+
const pk = this.resolveProjectKey(opts?.projectKey);
|
|
302
|
+
const outputs = await this.resolveRunOutputs(recipeName, {
|
|
303
|
+
partition: opts?.partition,
|
|
304
|
+
projectKey: pk,
|
|
305
|
+
});
|
|
306
|
+
const shouldWait = opts?.wait === true
|
|
307
|
+
|| opts?.includeLogs === true
|
|
308
|
+
|| opts?.summary === true
|
|
309
|
+
|| opts?.timeoutMs !== undefined
|
|
310
|
+
|| opts?.pollIntervalMs !== undefined;
|
|
311
|
+
if (shouldWait) {
|
|
312
|
+
const waitResult = await this.client.jobs.buildAndWaitOutputs(outputs, {
|
|
313
|
+
buildMode: opts?.buildMode,
|
|
314
|
+
includeLogs: opts?.includeLogs,
|
|
315
|
+
maxLogLines: opts?.maxLogLines,
|
|
316
|
+
logFilter: opts?.logFilter,
|
|
317
|
+
pollIntervalMs: opts?.pollIntervalMs,
|
|
318
|
+
projectKey: pk,
|
|
319
|
+
timeoutMs: opts?.timeoutMs,
|
|
320
|
+
summary: opts?.summary,
|
|
321
|
+
});
|
|
322
|
+
return { recipeName, outputs, ...waitResult, };
|
|
323
|
+
}
|
|
324
|
+
const started = await this.client.jobs.buildOutputs(outputs, {
|
|
325
|
+
buildMode: opts?.buildMode,
|
|
326
|
+
projectKey: pk,
|
|
327
|
+
});
|
|
328
|
+
return { recipeName, outputs, ...started, };
|
|
329
|
+
}
|
|
95
330
|
/** Create a recipe, with optional output dataset provisioning and join configuration. */
|
|
96
331
|
async create(opts) {
|
|
97
332
|
const pk = this.resolveProjectKey(opts.projectKey);
|
|
@@ -24,6 +24,82 @@ function asString(value) {
|
|
|
24
24
|
const trimmed = value.trim();
|
|
25
25
|
return trimmed.length > 0 ? trimmed : undefined;
|
|
26
26
|
}
|
|
27
|
+
function firstStringField(record, fields) {
|
|
28
|
+
for (const field of fields) {
|
|
29
|
+
const value = record[field];
|
|
30
|
+
if (typeof value === "string" && value.trim().length > 0)
|
|
31
|
+
return value.trim();
|
|
32
|
+
}
|
|
33
|
+
return undefined;
|
|
34
|
+
}
|
|
35
|
+
function isLikelySqlErrorDetail(detail) {
|
|
36
|
+
const lower = detail.toLowerCase();
|
|
37
|
+
return /\b[A-Z_]+(?:_ERROR|_NOT_FOUND|_MISMATCH|_DENIED|_EXCEEDED)\b/.test(detail)
|
|
38
|
+
|| lower.includes("athena")
|
|
39
|
+
|| lower.includes("sql")
|
|
40
|
+
|| lower.includes("query")
|
|
41
|
+
|| lower.includes("column")
|
|
42
|
+
|| lower.includes("table")
|
|
43
|
+
|| lower.includes("line ");
|
|
44
|
+
}
|
|
45
|
+
function sqlErrorDetailFromBody(body) {
|
|
46
|
+
const trimmed = body.trim();
|
|
47
|
+
if (!trimmed)
|
|
48
|
+
return undefined;
|
|
49
|
+
try {
|
|
50
|
+
const parsed = JSON.parse(trimmed);
|
|
51
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
52
|
+
const record = parsed;
|
|
53
|
+
const nested = asRecord(record.details) ?? asRecord(record.error);
|
|
54
|
+
const nestedDetail = nested
|
|
55
|
+
? firstStringField(nested, ["message", "errorMessage", "error", "reason", "cause",])
|
|
56
|
+
: undefined;
|
|
57
|
+
if (nestedDetail && isLikelySqlErrorDetail(nestedDetail))
|
|
58
|
+
return nestedDetail;
|
|
59
|
+
const direct = firstStringField(record, [
|
|
60
|
+
"message",
|
|
61
|
+
"detailedMessage",
|
|
62
|
+
"errorMessage",
|
|
63
|
+
"error",
|
|
64
|
+
"reason",
|
|
65
|
+
"cause",
|
|
66
|
+
]);
|
|
67
|
+
if (direct && isLikelySqlErrorDetail(direct))
|
|
68
|
+
return direct;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
// Fall back to regex extraction from raw DSS/Athena text below.
|
|
73
|
+
}
|
|
74
|
+
const match = trimmed.match(/\b[A-Z_]+(?:_ERROR|_NOT_FOUND|_MISMATCH|_DENIED|_EXCEEDED)\b[:\s-]+[^\n\r]+/);
|
|
75
|
+
return match?.[0] ?? (isLikelySqlErrorDetail(trimmed) ? trimmed.slice(0, 500) : undefined);
|
|
76
|
+
}
|
|
77
|
+
function withSqlErrorContext(error) {
|
|
78
|
+
if (error instanceof DataikuError) {
|
|
79
|
+
const detail = sqlErrorDetailFromBody(error.body);
|
|
80
|
+
if (detail) {
|
|
81
|
+
let body = error.body;
|
|
82
|
+
try {
|
|
83
|
+
const parsed = JSON.parse(error.body);
|
|
84
|
+
if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
|
|
85
|
+
body = JSON.stringify({
|
|
86
|
+
...parsed,
|
|
87
|
+
message: `SQL query failed: ${detail}`,
|
|
88
|
+
sqlError: detail,
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
else {
|
|
92
|
+
body = `SQL query failed: ${detail}\n${error.body}`;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
body = `SQL query failed: ${detail}\n${error.body}`;
|
|
97
|
+
}
|
|
98
|
+
throw new DataikuError(error.status, error.statusText, body, error.retry);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
throw error;
|
|
102
|
+
}
|
|
27
103
|
function splitDatasetIdentifier(datasetFullName, fallbackProjectKey) {
|
|
28
104
|
const trimmed = datasetFullName.trim();
|
|
29
105
|
const dotIndex = trimmed.indexOf(".");
|
|
@@ -79,7 +155,7 @@ export class SqlResource extends BaseResource {
|
|
|
79
155
|
const { queryId, schema, } = await this.startQuery(opts);
|
|
80
156
|
const rows = await this.streamResults(queryId);
|
|
81
157
|
await this.finishStreaming(queryId);
|
|
82
|
-
return { queryId, schema, rows, };
|
|
158
|
+
return { queryId, schema, columns: schema, rows, };
|
|
83
159
|
}
|
|
84
160
|
async resolveDatasetQueryFallback(opts) {
|
|
85
161
|
const datasetFullName = opts.datasetFullName;
|
|
@@ -119,14 +195,19 @@ export class SqlResource extends BaseResource {
|
|
|
119
195
|
}
|
|
120
196
|
catch (error) {
|
|
121
197
|
if (!isUnsupportedSqlDatasetConnectionError(error))
|
|
122
|
-
|
|
198
|
+
withSqlErrorContext(error);
|
|
123
199
|
const retryOpts = await this.resolveDatasetQueryFallback(queryOpts);
|
|
124
200
|
if (!retryOpts) {
|
|
125
201
|
throw new Error(buildUnsupportedSqlDatasetConnectionMessage(queryOpts.datasetFullName), {
|
|
126
202
|
cause: error,
|
|
127
203
|
});
|
|
128
204
|
}
|
|
129
|
-
|
|
205
|
+
try {
|
|
206
|
+
return await this.executeQuery(retryOpts);
|
|
207
|
+
}
|
|
208
|
+
catch (retryError) {
|
|
209
|
+
withSqlErrorContext(retryError);
|
|
210
|
+
}
|
|
130
211
|
}
|
|
131
212
|
}
|
|
132
213
|
}
|
package/package.json
CHANGED
|
@@ -611,6 +611,10 @@ export declare const SqlQueryResponseSchema: import("@sinclair/typebox").TObject
|
|
|
611
611
|
name: import("@sinclair/typebox").TString;
|
|
612
612
|
type: import("@sinclair/typebox").TString;
|
|
613
613
|
}>>;
|
|
614
|
+
columns: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TArray<import("@sinclair/typebox").TObject<{
|
|
615
|
+
name: import("@sinclair/typebox").TString;
|
|
616
|
+
type: import("@sinclair/typebox").TString;
|
|
617
|
+
}>>>;
|
|
614
618
|
rows: import("@sinclair/typebox").TArray<import("@sinclair/typebox").TArray<import("@sinclair/typebox").TUnknown>>;
|
|
615
619
|
}>;
|
|
616
620
|
export type SqlQueryResponse = Static<typeof SqlQueryResponseSchema>;
|
|
@@ -601,6 +601,7 @@ export const SqlQueryResultSchema = Type.Object({
|
|
|
601
601
|
export const SqlQueryResponseSchema = Type.Object({
|
|
602
602
|
queryId: Type.String(),
|
|
603
603
|
schema: Type.Array(SqlQuerySchemaSchema),
|
|
604
|
+
columns: Type.Optional(Type.Array(SqlQuerySchemaSchema)),
|
|
604
605
|
rows: Type.Array(Type.Array(Type.Unknown())),
|
|
605
606
|
});
|
|
606
607
|
// ---------------------------------------------------------------------------
|