dataiku-sdk 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,6 +26,66 @@ const RECIPE_DEFINITION_FIELDS = new Set(["params", "inputs", "outputs", "script
26
26
  function rootRecipeDefinitionFields(data) {
27
27
  return Object.keys(data).filter((key) => RECIPE_DEFINITION_FIELDS.has(key));
28
28
  }
29
+ function normalizeRecipeOutputType(value) {
30
+ if (typeof value !== "string")
31
+ return undefined;
32
+ const normalized = value.trim().toUpperCase().replace(/-/g, "_");
33
+ if (normalized === "DATASET")
34
+ return "DATASET";
35
+ if (normalized === "MANAGED_FOLDER" || normalized === "FOLDER")
36
+ return "MANAGED_FOLDER";
37
+ return undefined;
38
+ }
39
+ function recipeOutputItems(recipe) {
40
+ const outputs = asRecord(recipe.outputs);
41
+ if (!outputs)
42
+ return [];
43
+ const result = [];
44
+ const seen = new Set();
45
+ for (const [role, roleValue,] of Object.entries(outputs)) {
46
+ const items = asRecord(roleValue)?.items;
47
+ if (!Array.isArray(items))
48
+ continue;
49
+ for (const itemValue of items) {
50
+ const item = asRecord(itemValue);
51
+ const ref = asString(item?.ref);
52
+ if (!ref)
53
+ continue;
54
+ const seenKey = ref;
55
+ if (seen.has(seenKey))
56
+ continue;
57
+ seen.add(seenKey);
58
+ const type = normalizeRecipeOutputType(item?.type ?? item?.targetType ?? item?.objectType);
59
+ result.push({
60
+ ref,
61
+ role,
62
+ ...(type ? { type, } : {}),
63
+ });
64
+ }
65
+ }
66
+ return result;
67
+ }
68
+ function recipeInputItems(recipe) {
69
+ const inputs = asRecord(recipe.inputs);
70
+ if (!inputs)
71
+ return [];
72
+ const result = [];
73
+ const seen = new Set();
74
+ for (const [role, roleValue,] of Object.entries(inputs)) {
75
+ const items = asRecord(roleValue)?.items;
76
+ if (!Array.isArray(items))
77
+ continue;
78
+ for (const itemValue of items) {
79
+ const item = asRecord(itemValue);
80
+ const ref = asString(item?.ref);
81
+ if (!ref || seen.has(ref))
82
+ continue;
83
+ seen.add(ref);
84
+ result.push({ ref, role, });
85
+ }
86
+ }
87
+ return result;
88
+ }
29
89
  function inferRecipeCodeExtension(recipeType) {
30
90
  const normalized = typeof recipeType === "string" ? recipeType.trim().toLowerCase() : "";
31
91
  if (!normalized)
@@ -92,6 +152,181 @@ export class RecipesResource extends BaseResource {
92
152
  }
93
153
  return opts?.includePayload ? { ...result, recipe, } : { recipe, };
94
154
  }
155
+ /** Validate declared recipe graph references before running/building. */
156
+ async validateGraph(recipeName, opts) {
157
+ const pk = this.resolveProjectKey(opts?.projectKey);
158
+ const { recipe, } = await this.get(recipeName, { projectKey: pk, });
159
+ const inputItems = recipeInputItems(recipe);
160
+ const outputItems = recipeOutputItems(recipe);
161
+ const [datasets, folders,] = await Promise.all([
162
+ this.client.datasets.list(pk),
163
+ this.client.folders.list(pk),
164
+ ]);
165
+ const datasetNames = new Set(datasets.map((dataset) => dataset.name));
166
+ const folderIdByRef = new Map();
167
+ for (const folder of folders) {
168
+ folderIdByRef.set(folder.id, folder.id);
169
+ if (folder.name)
170
+ folderIdByRef.set(folder.name, folder.id);
171
+ }
172
+ const resolveReference = (item, requireExplicitOutputType) => {
173
+ const folderId = folderIdByRef.get(item.ref);
174
+ const isDataset = datasetNames.has(item.ref);
175
+ if (item.type === "DATASET") {
176
+ return { ref: item.ref, role: item.role, type: "DATASET", exists: isDataset, id: item.ref, };
177
+ }
178
+ if (item.type === "MANAGED_FOLDER") {
179
+ return {
180
+ ref: item.ref,
181
+ role: item.role,
182
+ type: "MANAGED_FOLDER",
183
+ exists: folderId !== undefined,
184
+ id: folderId ?? item.ref,
185
+ };
186
+ }
187
+ if (isDataset && (!folderId || !requireExplicitOutputType)) {
188
+ return { ref: item.ref, role: item.role, type: "DATASET", exists: true, id: item.ref, };
189
+ }
190
+ if (folderId && !isDataset) {
191
+ return { ref: item.ref, role: item.role, type: "MANAGED_FOLDER", exists: true, id: folderId, };
192
+ }
193
+ return { ref: item.ref, role: item.role, exists: false, };
194
+ };
195
+ const inputs = inputItems.map((item) => resolveReference(item, false));
196
+ const outputs = outputItems.map((item) => resolveReference(item, true));
197
+ const ambiguousOutputs = outputItems
198
+ .filter((item) => !item.type && datasetNames.has(item.ref) && folderIdByRef.has(item.ref))
199
+ .map((item) => item.ref);
200
+ const missingInputs = inputs.filter((item) => !item.exists);
201
+ const missingOutputs = outputs.filter((item) => !item.exists);
202
+ const warnings = [];
203
+ if (outputItems.length === 0)
204
+ warnings.push("Recipe has no declared outputs to build.");
205
+ for (const ref of ambiguousOutputs) {
206
+ warnings.push(`Output "${ref}" matches both a dataset and a managed folder; declare an explicit output type.`);
207
+ }
208
+ for (const output of outputs) {
209
+ if (!output.exists) {
210
+ warnings.push(`Declared output "${output.ref}" was not found in project "${pk}".`);
211
+ }
212
+ }
213
+ for (const input of missingInputs) {
214
+ warnings.push(`Declared input "${input.ref}" was not found in project "${pk}".`);
215
+ }
216
+ return {
217
+ valid: missingInputs.length === 0
218
+ && missingOutputs.length === 0
219
+ && ambiguousOutputs.length === 0
220
+ && outputItems.length > 0,
221
+ recipeName,
222
+ projectKey: pk,
223
+ inputs,
224
+ outputs,
225
+ missingInputs,
226
+ missingOutputs,
227
+ ambiguousOutputs,
228
+ warnings,
229
+ };
230
+ }
231
+ /** Resolve recipe outputs to job-build targets. */
232
+ async resolveRunOutputs(recipeName, opts) {
233
+ const pk = this.resolveProjectKey(opts?.projectKey);
234
+ const { recipe, } = await this.get(recipeName, { projectKey: pk, });
235
+ const outputItems = recipeOutputItems(recipe);
236
+ if (outputItems.length === 0) {
237
+ throw new Error(`Recipe "${recipeName}" has no output items to build.`);
238
+ }
239
+ const [datasets, folders,] = await Promise.all([
240
+ this.client.datasets.list(pk),
241
+ this.client.folders.list(pk),
242
+ ]);
243
+ const datasetNames = new Set(datasets.map((dataset) => dataset.name));
244
+ const folderIdByRef = new Map();
245
+ for (const folder of folders) {
246
+ folderIdByRef.set(folder.id, folder.id);
247
+ if (folder.name)
248
+ folderIdByRef.set(folder.name, folder.id);
249
+ }
250
+ return outputItems.map((item) => {
251
+ if (item.type === "DATASET") {
252
+ return {
253
+ ref: item.ref,
254
+ role: item.role,
255
+ id: item.ref,
256
+ type: "DATASET",
257
+ projectKey: pk,
258
+ partition: opts?.partition,
259
+ };
260
+ }
261
+ const folderId = folderIdByRef.get(item.ref);
262
+ if (item.type === "MANAGED_FOLDER") {
263
+ return {
264
+ ref: item.ref,
265
+ role: item.role,
266
+ id: folderId ?? item.ref,
267
+ type: "MANAGED_FOLDER",
268
+ projectKey: pk,
269
+ partition: opts?.partition,
270
+ };
271
+ }
272
+ const isDataset = datasetNames.has(item.ref);
273
+ if (isDataset && folderId) {
274
+ throw new Error(`Recipe "${recipeName}" output "${item.ref}" matches both a dataset and a managed folder. Add an explicit output type to the recipe definition or build the target directly with --target-type.`);
275
+ }
276
+ if (folderId) {
277
+ return {
278
+ ref: item.ref,
279
+ role: item.role,
280
+ id: folderId,
281
+ type: "MANAGED_FOLDER",
282
+ projectKey: pk,
283
+ partition: opts?.partition,
284
+ };
285
+ }
286
+ if (isDataset) {
287
+ return {
288
+ ref: item.ref,
289
+ role: item.role,
290
+ id: item.ref,
291
+ type: "DATASET",
292
+ projectKey: pk,
293
+ partition: opts?.partition,
294
+ };
295
+ }
296
+ throw new Error(`Recipe "${recipeName}" output "${item.ref}" was not found as a dataset or managed folder in project "${pk}".`);
297
+ });
298
+ }
299
+ /** Run a recipe by building its resolved outputs. */
300
+ async run(recipeName, opts) {
301
+ const pk = this.resolveProjectKey(opts?.projectKey);
302
+ const outputs = await this.resolveRunOutputs(recipeName, {
303
+ partition: opts?.partition,
304
+ projectKey: pk,
305
+ });
306
+ const shouldWait = opts?.wait === true
307
+ || opts?.includeLogs === true
308
+ || opts?.summary === true
309
+ || opts?.timeoutMs !== undefined
310
+ || opts?.pollIntervalMs !== undefined;
311
+ if (shouldWait) {
312
+ const waitResult = await this.client.jobs.buildAndWaitOutputs(outputs, {
313
+ buildMode: opts?.buildMode,
314
+ includeLogs: opts?.includeLogs,
315
+ maxLogLines: opts?.maxLogLines,
316
+ logFilter: opts?.logFilter,
317
+ pollIntervalMs: opts?.pollIntervalMs,
318
+ projectKey: pk,
319
+ timeoutMs: opts?.timeoutMs,
320
+ summary: opts?.summary,
321
+ });
322
+ return { recipeName, outputs, ...waitResult, };
323
+ }
324
+ const started = await this.client.jobs.buildOutputs(outputs, {
325
+ buildMode: opts?.buildMode,
326
+ projectKey: pk,
327
+ });
328
+ return { recipeName, outputs, ...started, };
329
+ }
95
330
  /** Create a recipe, with optional output dataset provisioning and join configuration. */
96
331
  async create(opts) {
97
332
  const pk = this.resolveProjectKey(opts.projectKey);
@@ -24,6 +24,82 @@ function asString(value) {
24
24
  const trimmed = value.trim();
25
25
  return trimmed.length > 0 ? trimmed : undefined;
26
26
  }
27
+ function firstStringField(record, fields) {
28
+ for (const field of fields) {
29
+ const value = record[field];
30
+ if (typeof value === "string" && value.trim().length > 0)
31
+ return value.trim();
32
+ }
33
+ return undefined;
34
+ }
35
+ function isLikelySqlErrorDetail(detail) {
36
+ const lower = detail.toLowerCase();
37
+ return /\b[A-Z_]+(?:_ERROR|_NOT_FOUND|_MISMATCH|_DENIED|_EXCEEDED)\b/.test(detail)
38
+ || lower.includes("athena")
39
+ || lower.includes("sql")
40
+ || lower.includes("query")
41
+ || lower.includes("column")
42
+ || lower.includes("table")
43
+ || lower.includes("line ");
44
+ }
45
+ function sqlErrorDetailFromBody(body) {
46
+ const trimmed = body.trim();
47
+ if (!trimmed)
48
+ return undefined;
49
+ try {
50
+ const parsed = JSON.parse(trimmed);
51
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
52
+ const record = parsed;
53
+ const nested = asRecord(record.details) ?? asRecord(record.error);
54
+ const nestedDetail = nested
55
+ ? firstStringField(nested, ["message", "errorMessage", "error", "reason", "cause",])
56
+ : undefined;
57
+ if (nestedDetail && isLikelySqlErrorDetail(nestedDetail))
58
+ return nestedDetail;
59
+ const direct = firstStringField(record, [
60
+ "message",
61
+ "detailedMessage",
62
+ "errorMessage",
63
+ "error",
64
+ "reason",
65
+ "cause",
66
+ ]);
67
+ if (direct && isLikelySqlErrorDetail(direct))
68
+ return direct;
69
+ }
70
+ }
71
+ catch {
72
+ // Fall back to regex extraction from raw DSS/Athena text below.
73
+ }
74
+ const match = trimmed.match(/\b[A-Z_]+(?:_ERROR|_NOT_FOUND|_MISMATCH|_DENIED|_EXCEEDED)\b[:\s-]+[^\n\r]+/);
75
+ return match?.[0] ?? (isLikelySqlErrorDetail(trimmed) ? trimmed.slice(0, 500) : undefined);
76
+ }
77
+ function withSqlErrorContext(error) {
78
+ if (error instanceof DataikuError) {
79
+ const detail = sqlErrorDetailFromBody(error.body);
80
+ if (detail) {
81
+ let body = error.body;
82
+ try {
83
+ const parsed = JSON.parse(error.body);
84
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
85
+ body = JSON.stringify({
86
+ ...parsed,
87
+ message: `SQL query failed: ${detail}`,
88
+ sqlError: detail,
89
+ });
90
+ }
91
+ else {
92
+ body = `SQL query failed: ${detail}\n${error.body}`;
93
+ }
94
+ }
95
+ catch {
96
+ body = `SQL query failed: ${detail}\n${error.body}`;
97
+ }
98
+ throw new DataikuError(error.status, error.statusText, body, error.retry);
99
+ }
100
+ }
101
+ throw error;
102
+ }
27
103
  function splitDatasetIdentifier(datasetFullName, fallbackProjectKey) {
28
104
  const trimmed = datasetFullName.trim();
29
105
  const dotIndex = trimmed.indexOf(".");
@@ -79,7 +155,7 @@ export class SqlResource extends BaseResource {
79
155
  const { queryId, schema, } = await this.startQuery(opts);
80
156
  const rows = await this.streamResults(queryId);
81
157
  await this.finishStreaming(queryId);
82
- return { queryId, schema, rows, };
158
+ return { queryId, schema, columns: schema, rows, };
83
159
  }
84
160
  async resolveDatasetQueryFallback(opts) {
85
161
  const datasetFullName = opts.datasetFullName;
@@ -119,14 +195,19 @@ export class SqlResource extends BaseResource {
119
195
  }
120
196
  catch (error) {
121
197
  if (!isUnsupportedSqlDatasetConnectionError(error))
122
- throw error;
198
+ withSqlErrorContext(error);
123
199
  const retryOpts = await this.resolveDatasetQueryFallback(queryOpts);
124
200
  if (!retryOpts) {
125
201
  throw new Error(buildUnsupportedSqlDatasetConnectionMessage(queryOpts.datasetFullName), {
126
202
  cause: error,
127
203
  });
128
204
  }
129
- return this.executeQuery(retryOpts);
205
+ try {
206
+ return await this.executeQuery(retryOpts);
207
+ }
208
+ catch (retryError) {
209
+ withSqlErrorContext(retryError);
210
+ }
130
211
  }
131
212
  }
132
213
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "dataiku-sdk",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Dataiku DSS SDK and CLI for programmatic access to DSS REST APIs",
5
5
  "type": "module",
6
6
  "workspaces": [
@@ -611,6 +611,10 @@ export declare const SqlQueryResponseSchema: import("@sinclair/typebox").TObject
611
611
  name: import("@sinclair/typebox").TString;
612
612
  type: import("@sinclair/typebox").TString;
613
613
  }>>;
614
+ columns: import("@sinclair/typebox").TOptional<import("@sinclair/typebox").TArray<import("@sinclair/typebox").TObject<{
615
+ name: import("@sinclair/typebox").TString;
616
+ type: import("@sinclair/typebox").TString;
617
+ }>>>;
614
618
  rows: import("@sinclair/typebox").TArray<import("@sinclair/typebox").TArray<import("@sinclair/typebox").TUnknown>>;
615
619
  }>;
616
620
  export type SqlQueryResponse = Static<typeof SqlQueryResponseSchema>;
@@ -601,6 +601,7 @@ export const SqlQueryResultSchema = Type.Object({
601
601
  export const SqlQueryResponseSchema = Type.Object({
602
602
  queryId: Type.String(),
603
603
  schema: Type.Array(SqlQuerySchemaSchema),
604
+ columns: Type.Optional(Type.Array(SqlQuerySchemaSchema)),
604
605
  rows: Type.Array(Type.Array(Type.Unknown())),
605
606
  });
606
607
  // ---------------------------------------------------------------------------