dataiku-sdk 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,6 +71,18 @@ export function classifyDataikuError(status, body) {
71
71
  retryHint: "Requested object was not found. Verify projectKey and object identifiers before retrying.",
72
72
  };
73
73
  }
74
+ const isSqlEngineValidation = status >= 400
75
+ && (lowerBody.includes("column_not_found")
76
+ || lowerBody.includes("table_not_found")
77
+ || lowerBody.includes("no_such_table")
78
+ || lowerBody.includes("column does not exist"));
79
+ if (isSqlEngineValidation) {
80
+ return {
81
+ category: "validation",
82
+ retryable: false,
83
+ retryHint: "Athena/SQL engine rejected the query: check column names, table names, and schema with dss dataset schema or dss connection tables. Do not retry unchanged SQL.",
84
+ };
85
+ }
74
86
  const isServerValidationLike = status >= 500
75
87
  && (lowerBody.includes("invalid")
76
88
  || lowerBody.includes("validation")
@@ -3,19 +3,19 @@ export { type CredentialValidationOptions, type CredentialValidationResult, vali
3
3
  export { deleteCredentials, type DssCredentials, getConfigDir, getCredentialsPath, loadCredentials, maskApiKey, saveCredentials, } from "./config.js";
4
4
  export { DataikuError, type DataikuErrorCategory, type DataikuErrorTaxonomy, type DataikuRetryMetadata, } from "./errors.js";
5
5
  export { CodeEnvsResource, } from "./resources/code-envs.js";
6
- export { ConnectionsResource, } from "./resources/connections.js";
6
+ export { type ConnectionSchemaListOptions, ConnectionsResource, type ConnectionTableListOptions, } from "./resources/connections.js";
7
7
  export { DashboardsResource, } from "./resources/dashboards.js";
8
8
  export { DataQualityResource, } from "./resources/data-quality.js";
9
- export { DatasetsResource, } from "./resources/datasets.js";
9
+ export { type DatasetBuildValidationResult, type DatasetCloneOptions, type DatasetCloneResult, type DatasetSchemaColumnInput, DatasetsResource, } from "./resources/datasets.js";
10
10
  export { type FlowZoneItemInput, FlowZonesResource, } from "./resources/flow-zones.js";
11
11
  export { FoldersResource, } from "./resources/folders.js";
12
12
  export { FuturesResource, } from "./resources/futures.js";
13
13
  export { InsightsResource, } from "./resources/insights.js";
14
- export { computeNextPollDelayMs, type JobBuildTargetType, JobsResource, } from "./resources/jobs.js";
14
+ export { computeNextPollDelayMs, type JobBuildAndWaitOptions, type JobBuildOptions, type JobBuildTarget, type JobBuildTargetType, type JobLogFilter, type JobLogProgress, type JobLogSummary, JobsResource, parseJobLogProgress, } from "./resources/jobs.js";
15
15
  export { NotebooksResource, } from "./resources/notebooks.js";
16
16
  export { type FlowMapResult, ProjectsResource, } from "./resources/projects.js";
17
- export { RecipesResource, } from "./resources/recipes.js";
18
- export { ScenariosResource, } from "./resources/scenarios.js";
17
+ export { type RecipeCloneOptions, type RecipeCloneResult, type RecipeGraphReference, type RecipeGraphValidationResult, type RecipeRunOptions, type RecipeRunOutput, type RecipeRunResult, RecipesResource, } from "./resources/recipes.js";
18
+ export { normalizeScenarioUpdateData, SCENARIO_CANONICAL_EDITABLE_FIELDS, type ScenarioFieldChange, type ScenarioFieldMismatch, ScenariosResource, type ScenarioUpdateNormalization, type ScenarioUpdatePreview, scenarioUpdatePreview, type ScenarioUpdateResult, } from "./resources/scenarios.js";
19
19
  export { SqlResource, } from "./resources/sql.js";
20
20
  export { VariablesResource, } from "./resources/variables.js";
21
21
  export { WikiResource, } from "./resources/wiki.js";
package/dist/src/index.js CHANGED
@@ -15,11 +15,11 @@ export { FlowZonesResource, } from "./resources/flow-zones.js";
15
15
  export { FoldersResource, } from "./resources/folders.js";
16
16
  export { FuturesResource, } from "./resources/futures.js";
17
17
  export { InsightsResource, } from "./resources/insights.js";
18
- export { computeNextPollDelayMs, JobsResource, } from "./resources/jobs.js";
18
+ export { computeNextPollDelayMs, JobsResource, parseJobLogProgress, } from "./resources/jobs.js";
19
19
  export { NotebooksResource, } from "./resources/notebooks.js";
20
20
  export { ProjectsResource, } from "./resources/projects.js";
21
21
  export { RecipesResource, } from "./resources/recipes.js";
22
- export { ScenariosResource, } from "./resources/scenarios.js";
22
+ export { normalizeScenarioUpdateData, SCENARIO_CANONICAL_EDITABLE_FIELDS, ScenariosResource, scenarioUpdatePreview, } from "./resources/scenarios.js";
23
23
  export { SqlResource, } from "./resources/sql.js";
24
24
  export { VariablesResource, } from "./resources/variables.js";
25
25
  export { WikiResource, } from "./resources/wiki.js";
@@ -1,5 +1,13 @@
1
1
  import type { ConnectionSummary } from "../schemas.js";
2
2
  import { BaseResource } from "./base.js";
3
+ export interface ConnectionSchemaListOptions {
4
+ connection: string;
5
+ projectKey?: string;
6
+ }
7
+ export interface ConnectionTableListOptions extends ConnectionSchemaListOptions {
8
+ catalog?: string;
9
+ schema?: string;
10
+ }
3
11
  export declare class ConnectionsResource extends BaseResource {
4
12
  /**
5
13
  * Returns sorted list of all connection names visible to the current user.
@@ -19,4 +27,6 @@ export declare class ConnectionsResource extends BaseResource {
19
27
  mode?: "fast" | "rich";
20
28
  projectKey?: string;
21
29
  }): Promise<ConnectionSummary[]>;
30
+ schemas(opts: ConnectionSchemaListOptions): Promise<string[]>;
31
+ tables(opts: ConnectionTableListOptions): Promise<Record<string, unknown>>;
22
32
  }
@@ -79,4 +79,20 @@ export class ConnectionsResource extends BaseResource {
79
79
  }
80
80
  return inferRichConnectionsFromDatasets(this.client, projectEnc);
81
81
  }
82
+ async schemas(opts) {
83
+ const pk = this.resolveProjectKey(opts.projectKey);
84
+ const params = new URLSearchParams();
85
+ params.set("connectionName", opts.connection);
86
+ return this.client.get(`/public/api/projects/${encodeURIComponent(pk)}/datasets/tables-import/actions/list-schemas?${params.toString()}`);
87
+ }
88
+ async tables(opts) {
89
+ const pk = this.resolveProjectKey(opts.projectKey);
90
+ const params = new URLSearchParams();
91
+ params.set("connectionName", opts.connection);
92
+ if (opts.catalog !== undefined)
93
+ params.set("catalogName", opts.catalog);
94
+ if (opts.schema !== undefined)
95
+ params.set("schemaName", opts.schema);
96
+ return this.client.get(`/public/api/projects/${encodeURIComponent(pk)}/datasets/tables-import/actions/list-tables?${params.toString()}`);
97
+ }
82
98
  }
@@ -1,5 +1,34 @@
1
1
  import { BaseResource } from "./base.js";
2
2
  import type { DatasetCreateOptions, DatasetDetails, DatasetSchema, DatasetSummary } from "../schemas.js";
3
+ export interface DatasetBuildValidationResult {
4
+ valid: boolean;
5
+ datasetName: string;
6
+ projectKey: string;
7
+ type: string | null;
8
+ path: string | null;
9
+ formatType: string | null;
10
+ warnings: string[];
11
+ }
12
+ export interface DatasetSchemaColumnInput {
13
+ name: string;
14
+ type: string;
15
+ comment?: string;
16
+ }
17
+ export interface DatasetCloneOptions {
18
+ projectKey?: string;
19
+ path?: string;
20
+ table?: string;
21
+ metastoreTableName?: string;
22
+ overrides?: Record<string, unknown>;
23
+ allowSamePath?: boolean;
24
+ }
25
+ export interface DatasetCloneResult {
26
+ source: string;
27
+ target: string;
28
+ projectKey: string;
29
+ created: Record<string, unknown>;
30
+ settings: Record<string, unknown>;
31
+ }
3
32
  /**
4
33
  * Compare streamed TSV header columns against a known dataset schema.
5
34
  * Returns an array of warning strings (empty if all columns match).
@@ -7,6 +36,7 @@ import type { DatasetCreateOptions, DatasetDetails, DatasetSchema, DatasetSummar
7
36
  export declare function validateStreamColumns(headerRow: string[], expectedColumns: {
8
37
  name: string;
9
38
  }[]): string[];
39
+ export declare function buildDatasetCloneSettings(source: DatasetDetails, targetName: string, projectKey: string, opts: DatasetCloneOptions): Record<string, unknown>;
10
40
  export declare class DatasetsResource extends BaseResource {
11
41
  /** List all datasets in a project. */
12
42
  list(projectKey?: string): Promise<DatasetSummary[]>;
@@ -14,6 +44,8 @@ export declare class DatasetsResource extends BaseResource {
14
44
  get(datasetName: string, projectKey?: string): Promise<DatasetDetails>;
15
45
  /** Get dataset schema (column names and types). */
16
46
  schema(datasetName: string, projectKey?: string): Promise<DatasetSchema>;
47
+ /** Replace dataset schema columns directly through the schema endpoint. */
48
+ updateSchema(datasetName: string, columns: DatasetSchemaColumnInput[], projectKey?: string): Promise<void>;
17
49
  /**
18
50
  * Preview dataset data as CSV text.
19
51
  * Streams TSV from the API, converts to CSV, and returns up to `maxRows`
@@ -51,6 +83,10 @@ export declare class DatasetsResource extends BaseResource {
51
83
  * from existing datasets on the same connection.
52
84
  */
53
85
  create(opts: DatasetCreateOptions): Promise<Record<string, unknown>>;
86
+ /** Validate common build blockers before running a dataset build. */
87
+ validateBuildSettings(datasetName: string, projectKey?: string): Promise<DatasetBuildValidationResult>;
88
+ /** Clone dataset settings, preserving connection/storage, format, and schema fields. */
89
+ clone(sourceName: string, targetName: string, opts?: DatasetCloneOptions): Promise<DatasetCloneResult>;
54
90
  /** Update a dataset by deep-merging a patch into the current definition. */
55
91
  update(datasetName: string, data: Record<string, unknown>, projectKey?: string): Promise<void>;
56
92
  /** Delete a dataset. */
@@ -306,6 +306,38 @@ function buildDatasetCreateBody(opts) {
306
306
  managed: opts.managed ?? true,
307
307
  };
308
308
  }
309
+ export function buildDatasetCloneSettings(source, targetName, projectKey, opts) {
310
+ const params = {
311
+ ...source.params,
312
+ ...(opts.path !== undefined ? { path: opts.path, } : {}),
313
+ ...(opts.table !== undefined ? { table: opts.table, mode: "table", } : {}),
314
+ ...(opts.metastoreTableName !== undefined
315
+ ? { metastoreTableName: opts.metastoreTableName, }
316
+ : {}),
317
+ };
318
+ const cloned = {
319
+ name: targetName,
320
+ projectKey,
321
+ ...(source.type !== undefined ? { type: source.type, } : {}),
322
+ ...(source.managed !== undefined ? { managed: source.managed, } : {}),
323
+ ...(Object.keys(params).length > 0 ? { params, } : {}),
324
+ ...(source.formatType !== undefined ? { formatType: source.formatType, } : {}),
325
+ ...(source.formatParams !== undefined ? { formatParams: source.formatParams, } : {}),
326
+ ...(source.schema !== undefined ? { schema: source.schema, } : {}),
327
+ };
328
+ const settings = opts.overrides ? deepMerge(cloned, opts.overrides) : cloned;
329
+ const settingsParams = settings.params && typeof settings.params === "object" && !Array.isArray(settings.params)
330
+ ? settings.params
331
+ : {};
332
+ const sourcePath = typeof source.params?.path === "string" ? source.params.path : undefined;
333
+ if (opts.allowSamePath !== true
334
+ && source.managed === true
335
+ && sourcePath !== undefined
336
+ && settingsParams.path === sourcePath) {
337
+ throw new Error(`Refusing to clone managed dataset "${source.name}" with the same storage path. Pass a new path or allowSamePath: true.`);
338
+ }
339
+ return settings;
340
+ }
309
341
  // ---------------------------------------------------------------------------
310
342
  // Resource
311
343
  // ---------------------------------------------------------------------------
@@ -327,6 +359,11 @@ export class DatasetsResource extends BaseResource {
327
359
  const raw = await this.client.get(`/public/api/projects/${this.enc(projectKey)}/datasets/${dsEnc}/schema`);
328
360
  return this.client.safeParse(DatasetSchemaSchema, raw, "datasets.schema");
329
361
  }
362
+ /** Replace dataset schema columns directly through the schema endpoint. */
363
+ async updateSchema(datasetName, columns, projectKey) {
364
+ const dsEnc = encodeURIComponent(datasetName);
365
+ await this.client.put(`/public/api/projects/${this.enc(projectKey)}/datasets/${dsEnc}/schema`, { columns, });
366
+ }
330
367
  /**
331
368
  * Preview dataset data as CSV text.
332
369
  * Streams TSV from the API, converts to CSV, and returns up to `maxRows`
@@ -444,6 +481,49 @@ export class DatasetsResource extends BaseResource {
444
481
  return this.client.post(`/public/api/projects/${enc}/datasets/`, body);
445
482
  }
446
483
  }
484
+ /** Validate common build blockers before running a dataset build. */
485
+ async validateBuildSettings(datasetName, projectKey) {
486
+ const pk = this.resolveProjectKey(projectKey);
487
+ const details = await this.get(datasetName, pk);
488
+ const params = details.params ?? {};
489
+ const type = details.type ?? null;
490
+ const path = typeof params.path === "string" && params.path.trim().length > 0
491
+ ? params.path
492
+ : null;
493
+ const table = typeof params.table === "string" && params.table.trim().length > 0
494
+ ? params.table
495
+ : null;
496
+ const normalizedType = (type ?? "").toLowerCase();
497
+ const fileBacked = !table
498
+ && (normalizedType.includes("filesystem")
499
+ || normalizedType.includes("uploaded")
500
+ || normalizedType.includes("s3")
501
+ || path !== null);
502
+ const formatType = details.formatType ?? null;
503
+ const warnings = [];
504
+ if (fileBacked && !path) {
505
+ warnings.push("File-backed dataset has no writable storage path configured.");
506
+ }
507
+ if (fileBacked && !formatType) {
508
+ warnings.push("File-backed dataset has no formatType configured.");
509
+ }
510
+ return {
511
+ valid: warnings.length === 0,
512
+ datasetName,
513
+ projectKey: pk,
514
+ type,
515
+ path,
516
+ formatType,
517
+ warnings,
518
+ };
519
+ }
520
+ /** Clone dataset settings, preserving connection/storage, format, and schema fields. */
521
+ async clone(sourceName, targetName, opts = {}) {
522
+ const pk = this.resolveProjectKey(opts.projectKey);
523
+ const settings = buildDatasetCloneSettings(await this.get(sourceName, pk), targetName, pk, opts);
524
+ const created = await this.client.post(`/public/api/projects/${encodeURIComponent(pk)}/datasets/`, settings);
525
+ return { source: sourceName, target: targetName, projectKey: pk, created, settings, };
526
+ }
447
527
  /** Update a dataset by deep-merging a patch into the current definition. */
448
528
  async update(datasetName, data, projectKey) {
449
529
  const dsEnc = encodeURIComponent(datasetName);
@@ -1,6 +1,42 @@
1
1
  import type { BuildMode, JobSummary, JobWaitResult } from "../schemas.js";
2
2
  import { BaseResource } from "./base.js";
3
3
  export type JobBuildTargetType = "DATASET" | "MANAGED_FOLDER";
4
+ export type JobLogFilter = "stdout" | "stderr" | "user" | "errors";
5
+ export interface JobLogProgress {
6
+ lastProgressLine?: string;
7
+ doneLine?: string;
8
+ counters: Record<string, number>;
9
+ rowsPerMinute?: number;
10
+ }
11
+ export interface JobLogSummary {
12
+ state: string;
13
+ lineCount: number;
14
+ lines: string[];
15
+ progress?: JobLogProgress;
16
+ }
17
+ export interface JobBuildTarget {
18
+ id: string;
19
+ type?: JobBuildTargetType;
20
+ projectKey?: string;
21
+ partition?: string;
22
+ }
23
+ export interface JobBuildOptions {
24
+ buildMode?: BuildMode;
25
+ autoUpdateSchema?: boolean;
26
+ projectKey?: string;
27
+ targetType?: JobBuildTargetType;
28
+ partition?: string;
29
+ }
30
+ export interface JobBuildAndWaitOptions extends JobBuildOptions {
31
+ activity?: string;
32
+ includeLogs?: boolean;
33
+ maxLogLines?: number;
34
+ pollIntervalMs?: number;
35
+ timeoutMs?: number;
36
+ logFilter?: JobLogFilter;
37
+ logId?: string;
38
+ summary?: boolean;
39
+ }
4
40
  interface ComputeNextPollDelayMsOptions {
5
41
  pollCount: number;
6
42
  baseIntervalMs: number;
@@ -12,6 +48,7 @@ interface ComputeNextPollDelayMsOptions {
12
48
  * capped at MAX_POLL_INTERVAL_MS (or baseIntervalMs if it's larger).
13
49
  */
14
50
  export declare function computeNextPollDelayMs({ pollCount, baseIntervalMs, adaptiveEnabled, }: ComputeNextPollDelayMsOptions): number;
51
+ export declare function parseJobLogProgress(log: string, elapsedMs?: number): JobLogProgress | undefined;
15
52
  export declare class JobsResource extends BaseResource {
16
53
  /** List jobs in a project. */
17
54
  list(projectKey?: string): Promise<JobSummary[]>;
@@ -24,36 +61,41 @@ export declare class JobsResource extends BaseResource {
24
61
  */
25
62
  log(jobId: string, opts?: {
26
63
  activity?: string;
64
+ logId?: string;
27
65
  maxLogLines?: number;
28
66
  projectKey?: string;
29
67
  }): Promise<string>;
68
+ logFromUrl(logUrl: string, opts?: {
69
+ maxLogLines?: number;
70
+ }): Promise<string>;
30
71
  /**
31
- * Start a build job for a dataset or managed folder.
72
+ * Start a build job for one or more dataset or managed-folder outputs.
32
73
  * Returns the new job's ID.
33
74
  */
34
- build(targetId: string, opts?: {
35
- buildMode?: BuildMode;
36
- autoUpdateSchema?: boolean;
37
- projectKey?: string;
38
- targetType?: JobBuildTargetType;
39
- }): Promise<{
75
+ buildOutputs(targets: JobBuildTarget[], opts?: JobBuildOptions): Promise<{
76
+ jobId: string;
77
+ }>;
78
+ /**
79
+ * Start a build job for a single dataset or managed folder.
80
+ * Returns the new job's ID.
81
+ */
82
+ build(targetId: string, opts?: JobBuildOptions): Promise<{
40
83
  jobId: string;
41
84
  }>;
85
+ /**
86
+ * Build one or more dataset or managed-folder outputs and wait for a terminal state.
87
+ * Combines {@link buildOutputs} then {@link wait}.
88
+ */
89
+ buildAndWaitOutputs(targets: JobBuildTarget[], opts?: JobBuildAndWaitOptions): Promise<JobWaitResult & {
90
+ logSummary?: JobLogSummary;
91
+ }>;
42
92
  /**
43
93
  * Build a dataset or managed folder and wait for the job to reach a terminal state.
44
94
  * Combines {@link build} then {@link wait}.
45
95
  */
46
- buildAndWait(targetId: string, opts?: {
47
- buildMode?: BuildMode;
48
- autoUpdateSchema?: boolean;
49
- activity?: string;
50
- includeLogs?: boolean;
51
- maxLogLines?: number;
52
- pollIntervalMs?: number;
53
- timeoutMs?: number;
54
- projectKey?: string;
55
- targetType?: JobBuildTargetType;
56
- }): Promise<JobWaitResult>;
96
+ buildAndWait(targetId: string, opts?: JobBuildAndWaitOptions): Promise<JobWaitResult & {
97
+ logSummary?: JobLogSummary;
98
+ }>;
57
99
  /**
58
100
  * Poll a job until it reaches a terminal state or times out.
59
101
  *
@@ -65,11 +107,16 @@ export declare class JobsResource extends BaseResource {
65
107
  wait(jobId: string, opts?: {
66
108
  activity?: string;
67
109
  includeLogs?: boolean;
110
+ logFilter?: JobLogFilter;
111
+ logId?: string;
68
112
  maxLogLines?: number;
69
113
  pollIntervalMs?: number;
114
+ summary?: boolean;
70
115
  timeoutMs?: number;
71
116
  projectKey?: string;
72
- }): Promise<JobWaitResult>;
117
+ }): Promise<JobWaitResult & {
118
+ logSummary?: JobLogSummary;
119
+ }>;
73
120
  /** Request a job abort. */
74
121
  abort(jobId: string, projectKey?: string): Promise<void>;
75
122
  }
@@ -35,6 +35,129 @@ export function computeNextPollDelayMs({ pollCount, baseIntervalMs, adaptiveEnab
35
35
  function sleep(ms) {
36
36
  return new Promise((resolve) => setTimeout(resolve, ms));
37
37
  }
38
+ const DEFAULT_TARGET_PARTITION = "NP";
39
+ function jobBuildOutput(target, defaultProjectKey, defaultPartition, defaultTargetType) {
40
+ const targetType = target.type ?? defaultTargetType ?? "DATASET";
41
+ const projectKey = target.projectKey ?? defaultProjectKey;
42
+ const partition = target.partition ?? defaultPartition;
43
+ const output = { projectKey, id: target.id, type: targetType, };
44
+ if (targetType === "DATASET") {
45
+ if (partition !== undefined)
46
+ output.partition = partition;
47
+ }
48
+ else {
49
+ output.targetManagedFolderProjectKey = projectKey;
50
+ output.targetManagedFolder = target.id;
51
+ output.targetPartition = partition ?? DEFAULT_TARGET_PARTITION;
52
+ }
53
+ return output;
54
+ }
55
+ function jobBuildDefinition(targets, defaultProjectKey, opts) {
56
+ if (targets.length === 0) {
57
+ throw new Error("At least one build target is required.");
58
+ }
59
+ const payload = {
60
+ outputs: targets.map((target) => jobBuildOutput(target, defaultProjectKey, opts?.partition, opts?.targetType)),
61
+ type: opts?.buildMode ?? "NON_RECURSIVE_FORCED_BUILD",
62
+ };
63
+ if (opts?.autoUpdateSchema
64
+ && targets.every((target) => (target.type ?? opts?.targetType ?? "DATASET") === "DATASET")) {
65
+ payload.autoUpdateSchemaBeforeEachRecipeRun = true;
66
+ }
67
+ return payload;
68
+ }
69
+ function jobLogLines(log) {
70
+ return log.split(/\r?\n/).map((line) => line.trimEnd());
71
+ }
72
+ function lineMatchesLogFilter(line, filter) {
73
+ const normalized = line.toLowerCase();
74
+ switch (filter) {
75
+ case "stdout":
76
+ return normalized.includes("stdout") || line.startsWith(">>> ");
77
+ case "stderr":
78
+ return normalized.includes("stderr");
79
+ case "errors":
80
+ return /\b(error|failed|failure|exception|traceback)\b/i.test(line);
81
+ case "user":
82
+ return !/^\d{4}[-/]\d{2}[-/]\d{2}/.test(line)
83
+ && !normalized.includes("backend-log")
84
+ && !normalized.includes("debug");
85
+ }
86
+ }
87
+ function filterJobLog(log, filter) {
88
+ if (!filter)
89
+ return log;
90
+ return jobLogLines(log).filter((line) => lineMatchesLogFilter(line, filter)).join("\n");
91
+ }
92
+ function limitJobLog(log, maxLines) {
93
+ if (!log)
94
+ return "";
95
+ const limit = maxLines ?? DEFAULT_MAX_LOG_LINES;
96
+ if (limit === 0 || limit === -1)
97
+ return log;
98
+ const lines = log.split(/\r?\n/);
99
+ const hasTrailingLineBreak = lines.length > 0 && lines[lines.length - 1] === "";
100
+ if (hasTrailingLineBreak)
101
+ lines.pop();
102
+ if (lines.length <= limit)
103
+ return log;
104
+ const tail = lines.slice(-Math.max(1, limit)).join("\n");
105
+ return hasTrailingLineBreak ? `${tail}\n` : tail;
106
+ }
107
+ function parsedCounterValue(value) {
108
+ return Number(value.replace(/,/g, ""));
109
+ }
110
+ export function parseJobLogProgress(log, elapsedMs) {
111
+ const counters = {};
112
+ let lastProgressLine;
113
+ let doneLine;
114
+ for (const line of jobLogLines(log)) {
115
+ const normalized = line.trim();
116
+ if (!normalized)
117
+ continue;
118
+ const lower = normalized.toLowerCase();
119
+ let matched = false;
120
+ for (const match of normalized.matchAll(/\b(scanned|matched|joined|written|emitted)\s+([0-9][0-9,]*)/gi)) {
121
+ counters[match[1].toLowerCase()] = parsedCounterValue(match[2]);
122
+ matched = true;
123
+ }
124
+ const written = normalized.match(/\b([0-9][0-9,]*)\s+rows\s+successfully\s+written\b/i);
125
+ if (written) {
126
+ counters.written = parsedCounterValue(written[1]);
127
+ doneLine = normalized;
128
+ matched = true;
129
+ }
130
+ if (lower.includes("done!")) {
131
+ doneLine = normalized;
132
+ matched = true;
133
+ }
134
+ if (matched)
135
+ lastProgressLine = normalized;
136
+ }
137
+ if (lastProgressLine === undefined && doneLine === undefined)
138
+ return undefined;
139
+ const writtenRows = counters.written ?? counters.emitted;
140
+ const rowsPerMinute = writtenRows !== undefined && elapsedMs !== undefined && elapsedMs > 0
141
+ ? writtenRows / (elapsedMs / 60_000)
142
+ : undefined;
143
+ return {
144
+ ...(lastProgressLine ? { lastProgressLine, } : {}),
145
+ ...(doneLine ? { doneLine, } : {}),
146
+ counters,
147
+ ...(rowsPerMinute !== undefined ? { rowsPerMinute, } : {}),
148
+ };
149
+ }
150
+ function summarizeJobLog(state, log, maxLines, elapsedMs) {
151
+ const lines = jobLogLines(log).map((line) => line.trim()).filter((line) => line.length > 0);
152
+ const summaryLines = lines.slice(-Math.max(1, maxLines));
153
+ const progress = parseJobLogProgress(log, elapsedMs);
154
+ return {
155
+ state,
156
+ lineCount: lines.length,
157
+ lines: summaryLines,
158
+ ...(progress ? { progress, } : {}),
159
+ };
160
+ }
38
161
  export class JobsResource extends BaseResource {
39
162
  /** List jobs in a project. */
40
163
  async list(projectKey) {
@@ -55,57 +178,72 @@ export class JobsResource extends BaseResource {
55
178
  async log(jobId, opts) {
56
179
  const jobEnc = encodeURIComponent(jobId);
57
180
  const query = opts?.activity ? `?activity=${encodeURIComponent(opts.activity)}` : "";
58
- const log = await this.client.getText(`/public/api/projects/${this.enc(opts?.projectKey)}/jobs/${jobEnc}/log/${query}`);
59
- if (!log)
60
- return "";
61
- const limit = opts?.maxLogLines ?? DEFAULT_MAX_LOG_LINES;
62
- if (limit === 0 || limit === -1) {
63
- return log;
64
- }
65
- const lines = log.split("\n");
66
- if (lines.length > limit) {
67
- return lines.slice(-limit).join("\n");
181
+ // DSS cat-activity-log URLs require a browser session; API-key callers must use the public log endpoint.
182
+ const path = `/public/api/projects/${this.enc(opts?.projectKey)}/jobs/${jobEnc}/log/${query}`;
183
+ const log = await this.client.getText(path);
184
+ return limitJobLog(log, opts?.maxLogLines);
185
+ }
186
+ async logFromUrl(logUrl, opts) {
187
+ const parsed = new URL(logUrl, "http://dss.local");
188
+ const projectKey = parsed.searchParams.get("projectKey") ?? undefined;
189
+ const jobId = parsed.searchParams.get("jobId") ?? undefined;
190
+ const activity = parsed.searchParams.get("activityId") ?? undefined;
191
+ if (!projectKey || !jobId || !activity) {
192
+ throw new Error("Log URL must include projectKey, jobId, and activityId query parameters.");
68
193
  }
69
- return log;
194
+ return this.log(jobId, { activity, projectKey, maxLogLines: opts?.maxLogLines, });
70
195
  }
71
196
  /**
72
- * Start a build job for a dataset or managed folder.
197
+ * Start a build job for one or more dataset or managed-folder outputs.
73
198
  * Returns the new job's ID.
74
199
  */
75
- async build(targetId, opts) {
200
+ async buildOutputs(targets, opts) {
76
201
  const pk = this.resolveProjectKey(opts?.projectKey);
77
202
  const enc = encodeURIComponent(pk);
78
- const targetType = opts?.targetType ?? "DATASET";
79
- const jobDef = {
80
- outputs: [{ projectKey: pk, id: targetId, type: targetType, },],
81
- type: opts?.buildMode ?? "NON_RECURSIVE_FORCED_BUILD",
82
- };
83
- if (opts?.autoUpdateSchema && targetType === "DATASET") {
84
- jobDef.autoUpdateSchemaBeforeEachRecipeRun = true;
85
- }
203
+ const jobDef = jobBuildDefinition(targets, pk, opts);
86
204
  const job = await this.client.post(`/public/api/projects/${enc}/jobs/`, jobDef);
87
205
  return { jobId: job.id, };
88
206
  }
89
207
  /**
90
- * Build a dataset or managed folder and wait for the job to reach a terminal state.
91
- * Combines {@link build} then {@link wait}.
208
+ * Start a build job for a single dataset or managed folder.
209
+ * Returns the new job's ID.
92
210
  */
93
- async buildAndWait(targetId, opts) {
94
- const { jobId, } = await this.build(targetId, {
95
- buildMode: opts?.buildMode,
96
- autoUpdateSchema: opts?.autoUpdateSchema,
97
- projectKey: opts?.projectKey,
98
- targetType: opts?.targetType,
99
- });
211
+ async build(targetId, opts) {
212
+ return this.buildOutputs([{
213
+ id: targetId,
214
+ type: opts?.targetType,
215
+ partition: opts?.partition,
216
+ },], opts);
217
+ }
218
+ /**
219
+ * Build one or more dataset or managed-folder outputs and wait for a terminal state.
220
+ * Combines {@link buildOutputs} then {@link wait}.
221
+ */
222
+ async buildAndWaitOutputs(targets, opts) {
223
+ const { jobId, } = await this.buildOutputs(targets, opts);
100
224
  return this.wait(jobId, {
101
225
  activity: opts?.activity,
102
226
  includeLogs: opts?.includeLogs,
227
+ logFilter: opts?.logFilter,
228
+ logId: opts?.logId,
103
229
  maxLogLines: opts?.maxLogLines,
104
230
  pollIntervalMs: opts?.pollIntervalMs,
231
+ summary: opts?.summary,
105
232
  timeoutMs: opts?.timeoutMs,
106
233
  projectKey: opts?.projectKey,
107
234
  });
108
235
  }
236
+ /**
237
+ * Build a dataset or managed folder and wait for the job to reach a terminal state.
238
+ * Combines {@link build} then {@link wait}.
239
+ */
240
+ async buildAndWait(targetId, opts) {
241
+ return this.buildAndWaitOutputs([{
242
+ id: targetId,
243
+ type: opts?.targetType,
244
+ partition: opts?.partition,
245
+ },], opts);
246
+ }
109
247
  /**
110
248
  * Poll a job until it reaches a terminal state or times out.
111
249
  *
@@ -133,12 +271,20 @@ export class JobsResource extends BaseResource {
133
271
  if (isTerminalState(state)) {
134
272
  const success = isSuccessfulTerminalState(state);
135
273
  let log;
136
- if (opts?.includeLogs) {
137
- log = await this.log(jobId, {
274
+ let logSummary;
275
+ if (opts?.includeLogs || opts?.summary) {
276
+ const rawLog = await this.log(jobId, {
138
277
  activity: opts.activity,
139
- maxLogLines: opts.maxLogLines,
278
+ maxLogLines: opts.summary ? 0 : opts.maxLogLines,
279
+ logId: opts.logId,
140
280
  projectKey: opts.projectKey,
141
281
  });
282
+ const filteredLog = filterJobLog(rawLog, opts.logFilter);
283
+ if (opts.includeLogs)
284
+ log = limitJobLog(filteredLog, opts.maxLogLines);
285
+ if (opts.summary) {
286
+ logSummary = summarizeJobLog(state, filteredLog, opts.maxLogLines ?? 20, elapsedMs);
287
+ }
142
288
  }
143
289
  return {
144
290
  success,
@@ -154,6 +300,7 @@ export class JobsResource extends BaseResource {
154
300
  total: gs.total ?? null,
155
301
  },
156
302
  ...(log !== undefined ? { log, } : {}),
303
+ ...(logSummary !== undefined ? { logSummary, } : {}),
157
304
  };
158
305
  }
159
306
  // Timeout — return failure result, don't throw