dataiku-sdk 0.5.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/packages/types/src/index.d.ts +4 -0
- package/dist/packages/types/src/index.js +1 -0
- package/dist/src/cli.js +1353 -128
- package/dist/src/errors.js +12 -0
- package/dist/src/index.d.ts +5 -5
- package/dist/src/index.js +2 -2
- package/dist/src/resources/connections.d.ts +10 -0
- package/dist/src/resources/connections.js +16 -0
- package/dist/src/resources/datasets.d.ts +36 -0
- package/dist/src/resources/datasets.js +80 -0
- package/dist/src/resources/jobs.d.ts +66 -19
- package/dist/src/resources/jobs.js +180 -33
- package/dist/src/resources/recipes.d.ts +80 -1
- package/dist/src/resources/recipes.js +349 -0
- package/dist/src/resources/scenarios.d.ts +38 -2
- package/dist/src/resources/scenarios.js +162 -3
- package/dist/src/resources/sql.js +84 -3
- package/dist/src/skill.d.ts +2 -2
- package/dist/src/skill.js +3 -3
- package/package.json +1 -1
- package/packages/types/dist/index.d.ts +4 -0
- package/packages/types/dist/index.js +1 -0
package/dist/src/errors.js
CHANGED
|
@@ -71,6 +71,18 @@ export function classifyDataikuError(status, body) {
|
|
|
71
71
|
retryHint: "Requested object was not found. Verify projectKey and object identifiers before retrying.",
|
|
72
72
|
};
|
|
73
73
|
}
|
|
74
|
+
const isSqlEngineValidation = status >= 400
|
|
75
|
+
&& (lowerBody.includes("column_not_found")
|
|
76
|
+
|| lowerBody.includes("table_not_found")
|
|
77
|
+
|| lowerBody.includes("no_such_table")
|
|
78
|
+
|| lowerBody.includes("column does not exist"));
|
|
79
|
+
if (isSqlEngineValidation) {
|
|
80
|
+
return {
|
|
81
|
+
category: "validation",
|
|
82
|
+
retryable: false,
|
|
83
|
+
retryHint: "Athena/SQL engine rejected the query: check column names, table names, and schema with dss dataset schema or dss connection tables. Do not retry unchanged SQL.",
|
|
84
|
+
};
|
|
85
|
+
}
|
|
74
86
|
const isServerValidationLike = status >= 500
|
|
75
87
|
&& (lowerBody.includes("invalid")
|
|
76
88
|
|| lowerBody.includes("validation")
|
package/dist/src/index.d.ts
CHANGED
|
@@ -3,19 +3,19 @@ export { type CredentialValidationOptions, type CredentialValidationResult, vali
|
|
|
3
3
|
export { deleteCredentials, type DssCredentials, getConfigDir, getCredentialsPath, loadCredentials, maskApiKey, saveCredentials, } from "./config.js";
|
|
4
4
|
export { DataikuError, type DataikuErrorCategory, type DataikuErrorTaxonomy, type DataikuRetryMetadata, } from "./errors.js";
|
|
5
5
|
export { CodeEnvsResource, } from "./resources/code-envs.js";
|
|
6
|
-
export { ConnectionsResource, } from "./resources/connections.js";
|
|
6
|
+
export { type ConnectionSchemaListOptions, ConnectionsResource, type ConnectionTableListOptions, } from "./resources/connections.js";
|
|
7
7
|
export { DashboardsResource, } from "./resources/dashboards.js";
|
|
8
8
|
export { DataQualityResource, } from "./resources/data-quality.js";
|
|
9
|
-
export { DatasetsResource, } from "./resources/datasets.js";
|
|
9
|
+
export { type DatasetBuildValidationResult, type DatasetCloneOptions, type DatasetCloneResult, type DatasetSchemaColumnInput, DatasetsResource, } from "./resources/datasets.js";
|
|
10
10
|
export { type FlowZoneItemInput, FlowZonesResource, } from "./resources/flow-zones.js";
|
|
11
11
|
export { FoldersResource, } from "./resources/folders.js";
|
|
12
12
|
export { FuturesResource, } from "./resources/futures.js";
|
|
13
13
|
export { InsightsResource, } from "./resources/insights.js";
|
|
14
|
-
export { computeNextPollDelayMs, type JobBuildTargetType, JobsResource, } from "./resources/jobs.js";
|
|
14
|
+
export { computeNextPollDelayMs, type JobBuildAndWaitOptions, type JobBuildOptions, type JobBuildTarget, type JobBuildTargetType, type JobLogFilter, type JobLogProgress, type JobLogSummary, JobsResource, parseJobLogProgress, } from "./resources/jobs.js";
|
|
15
15
|
export { NotebooksResource, } from "./resources/notebooks.js";
|
|
16
16
|
export { type FlowMapResult, ProjectsResource, } from "./resources/projects.js";
|
|
17
|
-
export { RecipesResource, } from "./resources/recipes.js";
|
|
18
|
-
export { ScenariosResource, } from "./resources/scenarios.js";
|
|
17
|
+
export { type RecipeCloneOptions, type RecipeCloneResult, type RecipeGraphReference, type RecipeGraphValidationResult, type RecipeRunOptions, type RecipeRunOutput, type RecipeRunResult, RecipesResource, } from "./resources/recipes.js";
|
|
18
|
+
export { normalizeScenarioUpdateData, SCENARIO_CANONICAL_EDITABLE_FIELDS, type ScenarioFieldChange, type ScenarioFieldMismatch, ScenariosResource, type ScenarioUpdateNormalization, type ScenarioUpdatePreview, scenarioUpdatePreview, type ScenarioUpdateResult, } from "./resources/scenarios.js";
|
|
19
19
|
export { SqlResource, } from "./resources/sql.js";
|
|
20
20
|
export { VariablesResource, } from "./resources/variables.js";
|
|
21
21
|
export { WikiResource, } from "./resources/wiki.js";
|
package/dist/src/index.js
CHANGED
|
@@ -15,11 +15,11 @@ export { FlowZonesResource, } from "./resources/flow-zones.js";
|
|
|
15
15
|
export { FoldersResource, } from "./resources/folders.js";
|
|
16
16
|
export { FuturesResource, } from "./resources/futures.js";
|
|
17
17
|
export { InsightsResource, } from "./resources/insights.js";
|
|
18
|
-
export { computeNextPollDelayMs, JobsResource, } from "./resources/jobs.js";
|
|
18
|
+
export { computeNextPollDelayMs, JobsResource, parseJobLogProgress, } from "./resources/jobs.js";
|
|
19
19
|
export { NotebooksResource, } from "./resources/notebooks.js";
|
|
20
20
|
export { ProjectsResource, } from "./resources/projects.js";
|
|
21
21
|
export { RecipesResource, } from "./resources/recipes.js";
|
|
22
|
-
export { ScenariosResource, } from "./resources/scenarios.js";
|
|
22
|
+
export { normalizeScenarioUpdateData, SCENARIO_CANONICAL_EDITABLE_FIELDS, ScenariosResource, scenarioUpdatePreview, } from "./resources/scenarios.js";
|
|
23
23
|
export { SqlResource, } from "./resources/sql.js";
|
|
24
24
|
export { VariablesResource, } from "./resources/variables.js";
|
|
25
25
|
export { WikiResource, } from "./resources/wiki.js";
|
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import type { ConnectionSummary } from "../schemas.js";
|
|
2
2
|
import { BaseResource } from "./base.js";
|
|
3
|
+
export interface ConnectionSchemaListOptions {
|
|
4
|
+
connection: string;
|
|
5
|
+
projectKey?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface ConnectionTableListOptions extends ConnectionSchemaListOptions {
|
|
8
|
+
catalog?: string;
|
|
9
|
+
schema?: string;
|
|
10
|
+
}
|
|
3
11
|
export declare class ConnectionsResource extends BaseResource {
|
|
4
12
|
/**
|
|
5
13
|
* Returns sorted list of all connection names visible to the current user.
|
|
@@ -19,4 +27,6 @@ export declare class ConnectionsResource extends BaseResource {
|
|
|
19
27
|
mode?: "fast" | "rich";
|
|
20
28
|
projectKey?: string;
|
|
21
29
|
}): Promise<ConnectionSummary[]>;
|
|
30
|
+
schemas(opts: ConnectionSchemaListOptions): Promise<string[]>;
|
|
31
|
+
tables(opts: ConnectionTableListOptions): Promise<Record<string, unknown>>;
|
|
22
32
|
}
|
|
@@ -79,4 +79,20 @@ export class ConnectionsResource extends BaseResource {
|
|
|
79
79
|
}
|
|
80
80
|
return inferRichConnectionsFromDatasets(this.client, projectEnc);
|
|
81
81
|
}
|
|
82
|
+
async schemas(opts) {
|
|
83
|
+
const pk = this.resolveProjectKey(opts.projectKey);
|
|
84
|
+
const params = new URLSearchParams();
|
|
85
|
+
params.set("connectionName", opts.connection);
|
|
86
|
+
return this.client.get(`/public/api/projects/${encodeURIComponent(pk)}/datasets/tables-import/actions/list-schemas?${params.toString()}`);
|
|
87
|
+
}
|
|
88
|
+
async tables(opts) {
|
|
89
|
+
const pk = this.resolveProjectKey(opts.projectKey);
|
|
90
|
+
const params = new URLSearchParams();
|
|
91
|
+
params.set("connectionName", opts.connection);
|
|
92
|
+
if (opts.catalog !== undefined)
|
|
93
|
+
params.set("catalogName", opts.catalog);
|
|
94
|
+
if (opts.schema !== undefined)
|
|
95
|
+
params.set("schemaName", opts.schema);
|
|
96
|
+
return this.client.get(`/public/api/projects/${encodeURIComponent(pk)}/datasets/tables-import/actions/list-tables?${params.toString()}`);
|
|
97
|
+
}
|
|
82
98
|
}
|
|
@@ -1,5 +1,34 @@
|
|
|
1
1
|
import { BaseResource } from "./base.js";
|
|
2
2
|
import type { DatasetCreateOptions, DatasetDetails, DatasetSchema, DatasetSummary } from "../schemas.js";
|
|
3
|
+
export interface DatasetBuildValidationResult {
|
|
4
|
+
valid: boolean;
|
|
5
|
+
datasetName: string;
|
|
6
|
+
projectKey: string;
|
|
7
|
+
type: string | null;
|
|
8
|
+
path: string | null;
|
|
9
|
+
formatType: string | null;
|
|
10
|
+
warnings: string[];
|
|
11
|
+
}
|
|
12
|
+
export interface DatasetSchemaColumnInput {
|
|
13
|
+
name: string;
|
|
14
|
+
type: string;
|
|
15
|
+
comment?: string;
|
|
16
|
+
}
|
|
17
|
+
export interface DatasetCloneOptions {
|
|
18
|
+
projectKey?: string;
|
|
19
|
+
path?: string;
|
|
20
|
+
table?: string;
|
|
21
|
+
metastoreTableName?: string;
|
|
22
|
+
overrides?: Record<string, unknown>;
|
|
23
|
+
allowSamePath?: boolean;
|
|
24
|
+
}
|
|
25
|
+
export interface DatasetCloneResult {
|
|
26
|
+
source: string;
|
|
27
|
+
target: string;
|
|
28
|
+
projectKey: string;
|
|
29
|
+
created: Record<string, unknown>;
|
|
30
|
+
settings: Record<string, unknown>;
|
|
31
|
+
}
|
|
3
32
|
/**
|
|
4
33
|
* Compare streamed TSV header columns against a known dataset schema.
|
|
5
34
|
* Returns an array of warning strings (empty if all columns match).
|
|
@@ -7,6 +36,7 @@ import type { DatasetCreateOptions, DatasetDetails, DatasetSchema, DatasetSummar
|
|
|
7
36
|
export declare function validateStreamColumns(headerRow: string[], expectedColumns: {
|
|
8
37
|
name: string;
|
|
9
38
|
}[]): string[];
|
|
39
|
+
export declare function buildDatasetCloneSettings(source: DatasetDetails, targetName: string, projectKey: string, opts: DatasetCloneOptions): Record<string, unknown>;
|
|
10
40
|
export declare class DatasetsResource extends BaseResource {
|
|
11
41
|
/** List all datasets in a project. */
|
|
12
42
|
list(projectKey?: string): Promise<DatasetSummary[]>;
|
|
@@ -14,6 +44,8 @@ export declare class DatasetsResource extends BaseResource {
|
|
|
14
44
|
get(datasetName: string, projectKey?: string): Promise<DatasetDetails>;
|
|
15
45
|
/** Get dataset schema (column names and types). */
|
|
16
46
|
schema(datasetName: string, projectKey?: string): Promise<DatasetSchema>;
|
|
47
|
+
/** Replace dataset schema columns directly through the schema endpoint. */
|
|
48
|
+
updateSchema(datasetName: string, columns: DatasetSchemaColumnInput[], projectKey?: string): Promise<void>;
|
|
17
49
|
/**
|
|
18
50
|
* Preview dataset data as CSV text.
|
|
19
51
|
* Streams TSV from the API, converts to CSV, and returns up to `maxRows`
|
|
@@ -51,6 +83,10 @@ export declare class DatasetsResource extends BaseResource {
|
|
|
51
83
|
* from existing datasets on the same connection.
|
|
52
84
|
*/
|
|
53
85
|
create(opts: DatasetCreateOptions): Promise<Record<string, unknown>>;
|
|
86
|
+
/** Validate common build blockers before running a dataset build. */
|
|
87
|
+
validateBuildSettings(datasetName: string, projectKey?: string): Promise<DatasetBuildValidationResult>;
|
|
88
|
+
/** Clone dataset settings, preserving connection/storage, format, and schema fields. */
|
|
89
|
+
clone(sourceName: string, targetName: string, opts?: DatasetCloneOptions): Promise<DatasetCloneResult>;
|
|
54
90
|
/** Update a dataset by deep-merging a patch into the current definition. */
|
|
55
91
|
update(datasetName: string, data: Record<string, unknown>, projectKey?: string): Promise<void>;
|
|
56
92
|
/** Delete a dataset. */
|
|
@@ -306,6 +306,38 @@ function buildDatasetCreateBody(opts) {
|
|
|
306
306
|
managed: opts.managed ?? true,
|
|
307
307
|
};
|
|
308
308
|
}
|
|
309
|
+
export function buildDatasetCloneSettings(source, targetName, projectKey, opts) {
|
|
310
|
+
const params = {
|
|
311
|
+
...source.params,
|
|
312
|
+
...(opts.path !== undefined ? { path: opts.path, } : {}),
|
|
313
|
+
...(opts.table !== undefined ? { table: opts.table, mode: "table", } : {}),
|
|
314
|
+
...(opts.metastoreTableName !== undefined
|
|
315
|
+
? { metastoreTableName: opts.metastoreTableName, }
|
|
316
|
+
: {}),
|
|
317
|
+
};
|
|
318
|
+
const cloned = {
|
|
319
|
+
name: targetName,
|
|
320
|
+
projectKey,
|
|
321
|
+
...(source.type !== undefined ? { type: source.type, } : {}),
|
|
322
|
+
...(source.managed !== undefined ? { managed: source.managed, } : {}),
|
|
323
|
+
...(Object.keys(params).length > 0 ? { params, } : {}),
|
|
324
|
+
...(source.formatType !== undefined ? { formatType: source.formatType, } : {}),
|
|
325
|
+
...(source.formatParams !== undefined ? { formatParams: source.formatParams, } : {}),
|
|
326
|
+
...(source.schema !== undefined ? { schema: source.schema, } : {}),
|
|
327
|
+
};
|
|
328
|
+
const settings = opts.overrides ? deepMerge(cloned, opts.overrides) : cloned;
|
|
329
|
+
const settingsParams = settings.params && typeof settings.params === "object" && !Array.isArray(settings.params)
|
|
330
|
+
? settings.params
|
|
331
|
+
: {};
|
|
332
|
+
const sourcePath = typeof source.params?.path === "string" ? source.params.path : undefined;
|
|
333
|
+
if (opts.allowSamePath !== true
|
|
334
|
+
&& source.managed === true
|
|
335
|
+
&& sourcePath !== undefined
|
|
336
|
+
&& settingsParams.path === sourcePath) {
|
|
337
|
+
throw new Error(`Refusing to clone managed dataset "${source.name}" with the same storage path. Pass a new path or allowSamePath: true.`);
|
|
338
|
+
}
|
|
339
|
+
return settings;
|
|
340
|
+
}
|
|
309
341
|
// ---------------------------------------------------------------------------
|
|
310
342
|
// Resource
|
|
311
343
|
// ---------------------------------------------------------------------------
|
|
@@ -327,6 +359,11 @@ export class DatasetsResource extends BaseResource {
|
|
|
327
359
|
const raw = await this.client.get(`/public/api/projects/${this.enc(projectKey)}/datasets/${dsEnc}/schema`);
|
|
328
360
|
return this.client.safeParse(DatasetSchemaSchema, raw, "datasets.schema");
|
|
329
361
|
}
|
|
362
|
+
/** Replace dataset schema columns directly through the schema endpoint. */
|
|
363
|
+
async updateSchema(datasetName, columns, projectKey) {
|
|
364
|
+
const dsEnc = encodeURIComponent(datasetName);
|
|
365
|
+
await this.client.put(`/public/api/projects/${this.enc(projectKey)}/datasets/${dsEnc}/schema`, { columns, });
|
|
366
|
+
}
|
|
330
367
|
/**
|
|
331
368
|
* Preview dataset data as CSV text.
|
|
332
369
|
* Streams TSV from the API, converts to CSV, and returns up to `maxRows`
|
|
@@ -444,6 +481,49 @@ export class DatasetsResource extends BaseResource {
|
|
|
444
481
|
return this.client.post(`/public/api/projects/${enc}/datasets/`, body);
|
|
445
482
|
}
|
|
446
483
|
}
|
|
484
|
+
/** Validate common build blockers before running a dataset build. */
|
|
485
|
+
async validateBuildSettings(datasetName, projectKey) {
|
|
486
|
+
const pk = this.resolveProjectKey(projectKey);
|
|
487
|
+
const details = await this.get(datasetName, pk);
|
|
488
|
+
const params = details.params ?? {};
|
|
489
|
+
const type = details.type ?? null;
|
|
490
|
+
const path = typeof params.path === "string" && params.path.trim().length > 0
|
|
491
|
+
? params.path
|
|
492
|
+
: null;
|
|
493
|
+
const table = typeof params.table === "string" && params.table.trim().length > 0
|
|
494
|
+
? params.table
|
|
495
|
+
: null;
|
|
496
|
+
const normalizedType = (type ?? "").toLowerCase();
|
|
497
|
+
const fileBacked = !table
|
|
498
|
+
&& (normalizedType.includes("filesystem")
|
|
499
|
+
|| normalizedType.includes("uploaded")
|
|
500
|
+
|| normalizedType.includes("s3")
|
|
501
|
+
|| path !== null);
|
|
502
|
+
const formatType = details.formatType ?? null;
|
|
503
|
+
const warnings = [];
|
|
504
|
+
if (fileBacked && !path) {
|
|
505
|
+
warnings.push("File-backed dataset has no writable storage path configured.");
|
|
506
|
+
}
|
|
507
|
+
if (fileBacked && !formatType) {
|
|
508
|
+
warnings.push("File-backed dataset has no formatType configured.");
|
|
509
|
+
}
|
|
510
|
+
return {
|
|
511
|
+
valid: warnings.length === 0,
|
|
512
|
+
datasetName,
|
|
513
|
+
projectKey: pk,
|
|
514
|
+
type,
|
|
515
|
+
path,
|
|
516
|
+
formatType,
|
|
517
|
+
warnings,
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
/** Clone dataset settings, preserving connection/storage, format, and schema fields. */
|
|
521
|
+
async clone(sourceName, targetName, opts = {}) {
|
|
522
|
+
const pk = this.resolveProjectKey(opts.projectKey);
|
|
523
|
+
const settings = buildDatasetCloneSettings(await this.get(sourceName, pk), targetName, pk, opts);
|
|
524
|
+
const created = await this.client.post(`/public/api/projects/${encodeURIComponent(pk)}/datasets/`, settings);
|
|
525
|
+
return { source: sourceName, target: targetName, projectKey: pk, created, settings, };
|
|
526
|
+
}
|
|
447
527
|
/** Update a dataset by deep-merging a patch into the current definition. */
|
|
448
528
|
async update(datasetName, data, projectKey) {
|
|
449
529
|
const dsEnc = encodeURIComponent(datasetName);
|
|
@@ -1,6 +1,42 @@
|
|
|
1
1
|
import type { BuildMode, JobSummary, JobWaitResult } from "../schemas.js";
|
|
2
2
|
import { BaseResource } from "./base.js";
|
|
3
3
|
export type JobBuildTargetType = "DATASET" | "MANAGED_FOLDER";
|
|
4
|
+
export type JobLogFilter = "stdout" | "stderr" | "user" | "errors";
|
|
5
|
+
export interface JobLogProgress {
|
|
6
|
+
lastProgressLine?: string;
|
|
7
|
+
doneLine?: string;
|
|
8
|
+
counters: Record<string, number>;
|
|
9
|
+
rowsPerMinute?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface JobLogSummary {
|
|
12
|
+
state: string;
|
|
13
|
+
lineCount: number;
|
|
14
|
+
lines: string[];
|
|
15
|
+
progress?: JobLogProgress;
|
|
16
|
+
}
|
|
17
|
+
export interface JobBuildTarget {
|
|
18
|
+
id: string;
|
|
19
|
+
type?: JobBuildTargetType;
|
|
20
|
+
projectKey?: string;
|
|
21
|
+
partition?: string;
|
|
22
|
+
}
|
|
23
|
+
export interface JobBuildOptions {
|
|
24
|
+
buildMode?: BuildMode;
|
|
25
|
+
autoUpdateSchema?: boolean;
|
|
26
|
+
projectKey?: string;
|
|
27
|
+
targetType?: JobBuildTargetType;
|
|
28
|
+
partition?: string;
|
|
29
|
+
}
|
|
30
|
+
export interface JobBuildAndWaitOptions extends JobBuildOptions {
|
|
31
|
+
activity?: string;
|
|
32
|
+
includeLogs?: boolean;
|
|
33
|
+
maxLogLines?: number;
|
|
34
|
+
pollIntervalMs?: number;
|
|
35
|
+
timeoutMs?: number;
|
|
36
|
+
logFilter?: JobLogFilter;
|
|
37
|
+
logId?: string;
|
|
38
|
+
summary?: boolean;
|
|
39
|
+
}
|
|
4
40
|
interface ComputeNextPollDelayMsOptions {
|
|
5
41
|
pollCount: number;
|
|
6
42
|
baseIntervalMs: number;
|
|
@@ -12,6 +48,7 @@ interface ComputeNextPollDelayMsOptions {
|
|
|
12
48
|
* capped at MAX_POLL_INTERVAL_MS (or baseIntervalMs if it's larger).
|
|
13
49
|
*/
|
|
14
50
|
export declare function computeNextPollDelayMs({ pollCount, baseIntervalMs, adaptiveEnabled, }: ComputeNextPollDelayMsOptions): number;
|
|
51
|
+
export declare function parseJobLogProgress(log: string, elapsedMs?: number): JobLogProgress | undefined;
|
|
15
52
|
export declare class JobsResource extends BaseResource {
|
|
16
53
|
/** List jobs in a project. */
|
|
17
54
|
list(projectKey?: string): Promise<JobSummary[]>;
|
|
@@ -24,36 +61,41 @@ export declare class JobsResource extends BaseResource {
|
|
|
24
61
|
*/
|
|
25
62
|
log(jobId: string, opts?: {
|
|
26
63
|
activity?: string;
|
|
64
|
+
logId?: string;
|
|
27
65
|
maxLogLines?: number;
|
|
28
66
|
projectKey?: string;
|
|
29
67
|
}): Promise<string>;
|
|
68
|
+
logFromUrl(logUrl: string, opts?: {
|
|
69
|
+
maxLogLines?: number;
|
|
70
|
+
}): Promise<string>;
|
|
30
71
|
/**
|
|
31
|
-
* Start a build job for
|
|
72
|
+
* Start a build job for one or more dataset or managed-folder outputs.
|
|
32
73
|
* Returns the new job's ID.
|
|
33
74
|
*/
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
75
|
+
buildOutputs(targets: JobBuildTarget[], opts?: JobBuildOptions): Promise<{
|
|
76
|
+
jobId: string;
|
|
77
|
+
}>;
|
|
78
|
+
/**
|
|
79
|
+
* Start a build job for a single dataset or managed folder.
|
|
80
|
+
* Returns the new job's ID.
|
|
81
|
+
*/
|
|
82
|
+
build(targetId: string, opts?: JobBuildOptions): Promise<{
|
|
40
83
|
jobId: string;
|
|
41
84
|
}>;
|
|
85
|
+
/**
|
|
86
|
+
* Build one or more dataset or managed-folder outputs and wait for a terminal state.
|
|
87
|
+
* Combines {@link buildOutputs} then {@link wait}.
|
|
88
|
+
*/
|
|
89
|
+
buildAndWaitOutputs(targets: JobBuildTarget[], opts?: JobBuildAndWaitOptions): Promise<JobWaitResult & {
|
|
90
|
+
logSummary?: JobLogSummary;
|
|
91
|
+
}>;
|
|
42
92
|
/**
|
|
43
93
|
* Build a dataset or managed folder and wait for the job to reach a terminal state.
|
|
44
94
|
* Combines {@link build} then {@link wait}.
|
|
45
95
|
*/
|
|
46
|
-
buildAndWait(targetId: string, opts?: {
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
activity?: string;
|
|
50
|
-
includeLogs?: boolean;
|
|
51
|
-
maxLogLines?: number;
|
|
52
|
-
pollIntervalMs?: number;
|
|
53
|
-
timeoutMs?: number;
|
|
54
|
-
projectKey?: string;
|
|
55
|
-
targetType?: JobBuildTargetType;
|
|
56
|
-
}): Promise<JobWaitResult>;
|
|
96
|
+
buildAndWait(targetId: string, opts?: JobBuildAndWaitOptions): Promise<JobWaitResult & {
|
|
97
|
+
logSummary?: JobLogSummary;
|
|
98
|
+
}>;
|
|
57
99
|
/**
|
|
58
100
|
* Poll a job until it reaches a terminal state or times out.
|
|
59
101
|
*
|
|
@@ -65,11 +107,16 @@ export declare class JobsResource extends BaseResource {
|
|
|
65
107
|
wait(jobId: string, opts?: {
|
|
66
108
|
activity?: string;
|
|
67
109
|
includeLogs?: boolean;
|
|
110
|
+
logFilter?: JobLogFilter;
|
|
111
|
+
logId?: string;
|
|
68
112
|
maxLogLines?: number;
|
|
69
113
|
pollIntervalMs?: number;
|
|
114
|
+
summary?: boolean;
|
|
70
115
|
timeoutMs?: number;
|
|
71
116
|
projectKey?: string;
|
|
72
|
-
}): Promise<JobWaitResult
|
|
117
|
+
}): Promise<JobWaitResult & {
|
|
118
|
+
logSummary?: JobLogSummary;
|
|
119
|
+
}>;
|
|
73
120
|
/** Request a job abort. */
|
|
74
121
|
abort(jobId: string, projectKey?: string): Promise<void>;
|
|
75
122
|
}
|
|
@@ -35,6 +35,129 @@ export function computeNextPollDelayMs({ pollCount, baseIntervalMs, adaptiveEnab
|
|
|
35
35
|
function sleep(ms) {
|
|
36
36
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
37
37
|
}
|
|
38
|
+
const DEFAULT_TARGET_PARTITION = "NP";
|
|
39
|
+
function jobBuildOutput(target, defaultProjectKey, defaultPartition, defaultTargetType) {
|
|
40
|
+
const targetType = target.type ?? defaultTargetType ?? "DATASET";
|
|
41
|
+
const projectKey = target.projectKey ?? defaultProjectKey;
|
|
42
|
+
const partition = target.partition ?? defaultPartition;
|
|
43
|
+
const output = { projectKey, id: target.id, type: targetType, };
|
|
44
|
+
if (targetType === "DATASET") {
|
|
45
|
+
if (partition !== undefined)
|
|
46
|
+
output.partition = partition;
|
|
47
|
+
}
|
|
48
|
+
else {
|
|
49
|
+
output.targetManagedFolderProjectKey = projectKey;
|
|
50
|
+
output.targetManagedFolder = target.id;
|
|
51
|
+
output.targetPartition = partition ?? DEFAULT_TARGET_PARTITION;
|
|
52
|
+
}
|
|
53
|
+
return output;
|
|
54
|
+
}
|
|
55
|
+
function jobBuildDefinition(targets, defaultProjectKey, opts) {
|
|
56
|
+
if (targets.length === 0) {
|
|
57
|
+
throw new Error("At least one build target is required.");
|
|
58
|
+
}
|
|
59
|
+
const payload = {
|
|
60
|
+
outputs: targets.map((target) => jobBuildOutput(target, defaultProjectKey, opts?.partition, opts?.targetType)),
|
|
61
|
+
type: opts?.buildMode ?? "NON_RECURSIVE_FORCED_BUILD",
|
|
62
|
+
};
|
|
63
|
+
if (opts?.autoUpdateSchema
|
|
64
|
+
&& targets.every((target) => (target.type ?? opts?.targetType ?? "DATASET") === "DATASET")) {
|
|
65
|
+
payload.autoUpdateSchemaBeforeEachRecipeRun = true;
|
|
66
|
+
}
|
|
67
|
+
return payload;
|
|
68
|
+
}
|
|
69
|
+
function jobLogLines(log) {
|
|
70
|
+
return log.split(/\r?\n/).map((line) => line.trimEnd());
|
|
71
|
+
}
|
|
72
|
+
function lineMatchesLogFilter(line, filter) {
|
|
73
|
+
const normalized = line.toLowerCase();
|
|
74
|
+
switch (filter) {
|
|
75
|
+
case "stdout":
|
|
76
|
+
return normalized.includes("stdout") || line.startsWith(">>> ");
|
|
77
|
+
case "stderr":
|
|
78
|
+
return normalized.includes("stderr");
|
|
79
|
+
case "errors":
|
|
80
|
+
return /\b(error|failed|failure|exception|traceback)\b/i.test(line);
|
|
81
|
+
case "user":
|
|
82
|
+
return !/^\d{4}[-/]\d{2}[-/]\d{2}/.test(line)
|
|
83
|
+
&& !normalized.includes("backend-log")
|
|
84
|
+
&& !normalized.includes("debug");
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
function filterJobLog(log, filter) {
|
|
88
|
+
if (!filter)
|
|
89
|
+
return log;
|
|
90
|
+
return jobLogLines(log).filter((line) => lineMatchesLogFilter(line, filter)).join("\n");
|
|
91
|
+
}
|
|
92
|
+
function limitJobLog(log, maxLines) {
|
|
93
|
+
if (!log)
|
|
94
|
+
return "";
|
|
95
|
+
const limit = maxLines ?? DEFAULT_MAX_LOG_LINES;
|
|
96
|
+
if (limit === 0 || limit === -1)
|
|
97
|
+
return log;
|
|
98
|
+
const lines = log.split(/\r?\n/);
|
|
99
|
+
const hasTrailingLineBreak = lines.length > 0 && lines[lines.length - 1] === "";
|
|
100
|
+
if (hasTrailingLineBreak)
|
|
101
|
+
lines.pop();
|
|
102
|
+
if (lines.length <= limit)
|
|
103
|
+
return log;
|
|
104
|
+
const tail = lines.slice(-Math.max(1, limit)).join("\n");
|
|
105
|
+
return hasTrailingLineBreak ? `${tail}\n` : tail;
|
|
106
|
+
}
|
|
107
|
+
function parsedCounterValue(value) {
|
|
108
|
+
return Number(value.replace(/,/g, ""));
|
|
109
|
+
}
|
|
110
|
+
export function parseJobLogProgress(log, elapsedMs) {
|
|
111
|
+
const counters = {};
|
|
112
|
+
let lastProgressLine;
|
|
113
|
+
let doneLine;
|
|
114
|
+
for (const line of jobLogLines(log)) {
|
|
115
|
+
const normalized = line.trim();
|
|
116
|
+
if (!normalized)
|
|
117
|
+
continue;
|
|
118
|
+
const lower = normalized.toLowerCase();
|
|
119
|
+
let matched = false;
|
|
120
|
+
for (const match of normalized.matchAll(/\b(scanned|matched|joined|written|emitted)\s+([0-9][0-9,]*)/gi)) {
|
|
121
|
+
counters[match[1].toLowerCase()] = parsedCounterValue(match[2]);
|
|
122
|
+
matched = true;
|
|
123
|
+
}
|
|
124
|
+
const written = normalized.match(/\b([0-9][0-9,]*)\s+rows\s+successfully\s+written\b/i);
|
|
125
|
+
if (written) {
|
|
126
|
+
counters.written = parsedCounterValue(written[1]);
|
|
127
|
+
doneLine = normalized;
|
|
128
|
+
matched = true;
|
|
129
|
+
}
|
|
130
|
+
if (lower.includes("done!")) {
|
|
131
|
+
doneLine = normalized;
|
|
132
|
+
matched = true;
|
|
133
|
+
}
|
|
134
|
+
if (matched)
|
|
135
|
+
lastProgressLine = normalized;
|
|
136
|
+
}
|
|
137
|
+
if (lastProgressLine === undefined && doneLine === undefined)
|
|
138
|
+
return undefined;
|
|
139
|
+
const writtenRows = counters.written ?? counters.emitted;
|
|
140
|
+
const rowsPerMinute = writtenRows !== undefined && elapsedMs !== undefined && elapsedMs > 0
|
|
141
|
+
? writtenRows / (elapsedMs / 60_000)
|
|
142
|
+
: undefined;
|
|
143
|
+
return {
|
|
144
|
+
...(lastProgressLine ? { lastProgressLine, } : {}),
|
|
145
|
+
...(doneLine ? { doneLine, } : {}),
|
|
146
|
+
counters,
|
|
147
|
+
...(rowsPerMinute !== undefined ? { rowsPerMinute, } : {}),
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
function summarizeJobLog(state, log, maxLines, elapsedMs) {
|
|
151
|
+
const lines = jobLogLines(log).map((line) => line.trim()).filter((line) => line.length > 0);
|
|
152
|
+
const summaryLines = lines.slice(-Math.max(1, maxLines));
|
|
153
|
+
const progress = parseJobLogProgress(log, elapsedMs);
|
|
154
|
+
return {
|
|
155
|
+
state,
|
|
156
|
+
lineCount: lines.length,
|
|
157
|
+
lines: summaryLines,
|
|
158
|
+
...(progress ? { progress, } : {}),
|
|
159
|
+
};
|
|
160
|
+
}
|
|
38
161
|
export class JobsResource extends BaseResource {
|
|
39
162
|
/** List jobs in a project. */
|
|
40
163
|
async list(projectKey) {
|
|
@@ -55,57 +178,72 @@ export class JobsResource extends BaseResource {
|
|
|
55
178
|
async log(jobId, opts) {
|
|
56
179
|
const jobEnc = encodeURIComponent(jobId);
|
|
57
180
|
const query = opts?.activity ? `?activity=${encodeURIComponent(opts.activity)}` : "";
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
181
|
+
// DSS cat-activity-log URLs require a browser session; API-key callers must use the public log endpoint.
|
|
182
|
+
const path = `/public/api/projects/${this.enc(opts?.projectKey)}/jobs/${jobEnc}/log/${query}`;
|
|
183
|
+
const log = await this.client.getText(path);
|
|
184
|
+
return limitJobLog(log, opts?.maxLogLines);
|
|
185
|
+
}
|
|
186
|
+
async logFromUrl(logUrl, opts) {
|
|
187
|
+
const parsed = new URL(logUrl, "http://dss.local");
|
|
188
|
+
const projectKey = parsed.searchParams.get("projectKey") ?? undefined;
|
|
189
|
+
const jobId = parsed.searchParams.get("jobId") ?? undefined;
|
|
190
|
+
const activity = parsed.searchParams.get("activityId") ?? undefined;
|
|
191
|
+
if (!projectKey || !jobId || !activity) {
|
|
192
|
+
throw new Error("Log URL must include projectKey, jobId, and activityId query parameters.");
|
|
68
193
|
}
|
|
69
|
-
return log;
|
|
194
|
+
return this.log(jobId, { activity, projectKey, maxLogLines: opts?.maxLogLines, });
|
|
70
195
|
}
|
|
71
196
|
/**
|
|
72
|
-
* Start a build job for
|
|
197
|
+
* Start a build job for one or more dataset or managed-folder outputs.
|
|
73
198
|
* Returns the new job's ID.
|
|
74
199
|
*/
|
|
75
|
-
async
|
|
200
|
+
async buildOutputs(targets, opts) {
|
|
76
201
|
const pk = this.resolveProjectKey(opts?.projectKey);
|
|
77
202
|
const enc = encodeURIComponent(pk);
|
|
78
|
-
const
|
|
79
|
-
const jobDef = {
|
|
80
|
-
outputs: [{ projectKey: pk, id: targetId, type: targetType, },],
|
|
81
|
-
type: opts?.buildMode ?? "NON_RECURSIVE_FORCED_BUILD",
|
|
82
|
-
};
|
|
83
|
-
if (opts?.autoUpdateSchema && targetType === "DATASET") {
|
|
84
|
-
jobDef.autoUpdateSchemaBeforeEachRecipeRun = true;
|
|
85
|
-
}
|
|
203
|
+
const jobDef = jobBuildDefinition(targets, pk, opts);
|
|
86
204
|
const job = await this.client.post(`/public/api/projects/${enc}/jobs/`, jobDef);
|
|
87
205
|
return { jobId: job.id, };
|
|
88
206
|
}
|
|
89
207
|
/**
|
|
90
|
-
*
|
|
91
|
-
*
|
|
208
|
+
* Start a build job for a single dataset or managed folder.
|
|
209
|
+
* Returns the new job's ID.
|
|
92
210
|
*/
|
|
93
|
-
async
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
211
|
+
async build(targetId, opts) {
|
|
212
|
+
return this.buildOutputs([{
|
|
213
|
+
id: targetId,
|
|
214
|
+
type: opts?.targetType,
|
|
215
|
+
partition: opts?.partition,
|
|
216
|
+
},], opts);
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Build one or more dataset or managed-folder outputs and wait for a terminal state.
|
|
220
|
+
* Combines {@link buildOutputs} then {@link wait}.
|
|
221
|
+
*/
|
|
222
|
+
async buildAndWaitOutputs(targets, opts) {
|
|
223
|
+
const { jobId, } = await this.buildOutputs(targets, opts);
|
|
100
224
|
return this.wait(jobId, {
|
|
101
225
|
activity: opts?.activity,
|
|
102
226
|
includeLogs: opts?.includeLogs,
|
|
227
|
+
logFilter: opts?.logFilter,
|
|
228
|
+
logId: opts?.logId,
|
|
103
229
|
maxLogLines: opts?.maxLogLines,
|
|
104
230
|
pollIntervalMs: opts?.pollIntervalMs,
|
|
231
|
+
summary: opts?.summary,
|
|
105
232
|
timeoutMs: opts?.timeoutMs,
|
|
106
233
|
projectKey: opts?.projectKey,
|
|
107
234
|
});
|
|
108
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Build a dataset or managed folder and wait for the job to reach a terminal state.
|
|
238
|
+
* Combines {@link build} then {@link wait}.
|
|
239
|
+
*/
|
|
240
|
+
async buildAndWait(targetId, opts) {
|
|
241
|
+
return this.buildAndWaitOutputs([{
|
|
242
|
+
id: targetId,
|
|
243
|
+
type: opts?.targetType,
|
|
244
|
+
partition: opts?.partition,
|
|
245
|
+
},], opts);
|
|
246
|
+
}
|
|
109
247
|
/**
|
|
110
248
|
* Poll a job until it reaches a terminal state or times out.
|
|
111
249
|
*
|
|
@@ -133,12 +271,20 @@ export class JobsResource extends BaseResource {
|
|
|
133
271
|
if (isTerminalState(state)) {
|
|
134
272
|
const success = isSuccessfulTerminalState(state);
|
|
135
273
|
let log;
|
|
136
|
-
|
|
137
|
-
|
|
274
|
+
let logSummary;
|
|
275
|
+
if (opts?.includeLogs || opts?.summary) {
|
|
276
|
+
const rawLog = await this.log(jobId, {
|
|
138
277
|
activity: opts.activity,
|
|
139
|
-
maxLogLines: opts.maxLogLines,
|
|
278
|
+
maxLogLines: opts.summary ? 0 : opts.maxLogLines,
|
|
279
|
+
logId: opts.logId,
|
|
140
280
|
projectKey: opts.projectKey,
|
|
141
281
|
});
|
|
282
|
+
const filteredLog = filterJobLog(rawLog, opts.logFilter);
|
|
283
|
+
if (opts.includeLogs)
|
|
284
|
+
log = limitJobLog(filteredLog, opts.maxLogLines);
|
|
285
|
+
if (opts.summary) {
|
|
286
|
+
logSummary = summarizeJobLog(state, filteredLog, opts.maxLogLines ?? 20, elapsedMs);
|
|
287
|
+
}
|
|
142
288
|
}
|
|
143
289
|
return {
|
|
144
290
|
success,
|
|
@@ -154,6 +300,7 @@ export class JobsResource extends BaseResource {
|
|
|
154
300
|
total: gs.total ?? null,
|
|
155
301
|
},
|
|
156
302
|
...(log !== undefined ? { log, } : {}),
|
|
303
|
+
...(logSummary !== undefined ? { logSummary, } : {}),
|
|
157
304
|
};
|
|
158
305
|
}
|
|
159
306
|
// Timeout — return failure result, don't throw
|