@ken-e/dataform-youtube 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/includes/column_descriptions.js +137 -95
- package/includes/definitions/sources/stg_ytc_annotation.js +16 -31
- package/includes/definitions/sources/stg_ytc_basic.js +15 -31
- package/includes/definitions/sources/stg_ytc_cards.js +16 -31
- package/includes/definitions/sources/stg_ytc_combined.js +15 -31
- package/includes/definitions/sources/stg_ytc_demographics.js +16 -31
- package/includes/definitions/sources/stg_ytc_device_os.js +16 -31
- package/includes/definitions/sources/stg_ytc_end_screens.js +14 -29
- package/includes/definitions/sources/stg_ytc_list_basic.js +15 -31
- package/includes/definitions/sources/stg_ytc_list_combined.js +16 -30
- package/includes/definitions/sources/stg_ytc_list_device_os.js +16 -30
- package/includes/definitions/sources/stg_ytc_list_playback.js +16 -30
- package/includes/definitions/sources/stg_ytc_list_province.js +15 -31
- package/includes/definitions/sources/stg_ytc_list_traffic_source.js +16 -30
- package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
- package/includes/definitions/sources/stg_ytc_playback.js +16 -31
- package/includes/definitions/sources/stg_ytc_province.js +15 -31
- package/includes/definitions/sources/stg_ytc_share_platform.js +16 -31
- package/includes/definitions/sources/stg_ytc_subtitles.js +15 -30
- package/includes/definitions/sources/stg_ytc_traffic_source.js +16 -49
- package/includes/definitions/ytc_annotation.js +14 -29
- package/includes/definitions/ytc_basic.js +14 -30
- package/includes/definitions/ytc_cards.js +14 -29
- package/includes/definitions/ytc_combined.js +14 -31
- package/includes/definitions/ytc_demographics.js +14 -28
- package/includes/definitions/ytc_demographics_views.js +15 -29
- package/includes/definitions/ytc_device_os.js +14 -28
- package/includes/definitions/ytc_end_screens.js +14 -28
- package/includes/definitions/ytc_list_basic.js +14 -28
- package/includes/definitions/ytc_list_combined.js +14 -27
- package/includes/definitions/ytc_list_device_os.js +14 -27
- package/includes/definitions/ytc_list_playback.js +14 -27
- package/includes/definitions/ytc_list_province.js +14 -28
- package/includes/definitions/ytc_list_traffic_source.js +14 -27
- package/includes/definitions/ytc_playback.js +14 -28
- package/includes/definitions/ytc_province.js +14 -28
- package/includes/definitions/ytc_share_platform.js +14 -28
- package/includes/definitions/ytc_subtitles.js +14 -27
- package/includes/definitions/ytc_traffic_source.js +14 -28
- package/includes/helpers.js +12 -0
- package/index.js +12 -2
- package/package.json +3 -3
|
@@ -3,24 +3,25 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("stg_ytc_list_province", {
|
|
10
11
|
type: "incremental",
|
|
11
12
|
database: config.target.database,
|
|
12
|
-
schema: config.datasetStaging,
|
|
13
|
+
schema: config.target.datasetStaging,
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["playlist_id", "video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"playlist_id",
|
|
26
27
|
"video_id",
|
|
@@ -30,43 +31,22 @@ module.exports = (config) => {
|
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
33
|
// make sure source partition and data dates match
|
|
33
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
34
35
|
},
|
|
35
36
|
columns: column_descriptions.column_descriptions,
|
|
36
37
|
description: "YT Channel Playlist Province Report Table - Staging",
|
|
37
38
|
})
|
|
38
|
-
.preOps(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
45
|
-
set source_date_checkpoint = (
|
|
46
|
-
${ctx.when(
|
|
47
|
-
ctx.incremental(),
|
|
48
|
-
`select
|
|
49
|
-
least(
|
|
50
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
51
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
52
|
-
)`,
|
|
53
|
-
`select date("${config.startDate}")`,
|
|
54
|
-
)}
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
${ctx.when(
|
|
58
|
-
ctx.incremental(),
|
|
59
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
60
|
-
)}
|
|
61
|
-
`,
|
|
62
|
-
)
|
|
39
|
+
.preOps((ctx) => {
|
|
40
|
+
// Get pre_operations to find updated source partitions
|
|
41
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_province")}`;
|
|
42
|
+
})
|
|
63
43
|
.query((ctx) =>
|
|
64
44
|
config.sources
|
|
65
45
|
.map((t) => {
|
|
66
46
|
return `
|
|
67
47
|
select
|
|
68
48
|
_PARTITIONDATE as source_partition_date,
|
|
69
|
-
parse_date('%Y%m%d',date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
70
50
|
"${t.schema}" as site_nm,
|
|
71
51
|
current_timestamp() as updated_at,
|
|
72
52
|
channel_id,
|
|
@@ -83,7 +63,11 @@ select
|
|
|
83
63
|
playlist_saves_added,
|
|
84
64
|
playlist_saves_removed
|
|
85
65
|
from ${ctx.ref(t.database, t.schema, "p_playlist_province_a1_" + t.suffix)}
|
|
86
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
87
71
|
`;
|
|
88
72
|
})
|
|
89
73
|
.join(" union all "),
|
|
@@ -2,22 +2,24 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_list_traffic_source", {
|
|
8
10
|
type: "incremental",
|
|
9
11
|
database: config.target.database,
|
|
10
|
-
schema: config.datasetStaging,
|
|
12
|
+
schema: config.target.datasetStaging,
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["playlist_id", "video_id", "traffic_source_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
uniqueKeys: [
|
|
19
21
|
[
|
|
20
|
-
"
|
|
22
|
+
"data_date",
|
|
21
23
|
"channel_id",
|
|
22
24
|
"playlist_id",
|
|
23
25
|
"video_id",
|
|
@@ -28,42 +30,22 @@ module.exports = (config) => {
|
|
|
28
30
|
"traffic_source_detail",
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
32
34
|
},
|
|
33
35
|
description:
|
|
34
36
|
"Staging table for YouTube Channel Playlist Traffic Source data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
set source_date_checkpoint = (
|
|
43
|
-
${ctx.when(
|
|
44
|
-
ctx.incremental(),
|
|
45
|
-
`select
|
|
46
|
-
least(
|
|
47
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
48
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
49
|
-
)`,
|
|
50
|
-
`select date("${config.startDate}")`,
|
|
51
|
-
)}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
${ctx.when(
|
|
55
|
-
ctx.incremental(),
|
|
56
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
57
|
-
)}
|
|
58
|
-
`,
|
|
59
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_traffic_source")}`;
|
|
41
|
+
})
|
|
60
42
|
.query((ctx) =>
|
|
61
43
|
config.sources
|
|
62
44
|
.map((t) => {
|
|
63
45
|
return `
|
|
64
46
|
select
|
|
65
47
|
_PARTITIONDATE as source_partition_date,
|
|
66
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
67
49
|
"${t.schema}" as site_nm,
|
|
68
50
|
current_timestamp() as updated_at,
|
|
69
51
|
channel_id,
|
|
@@ -81,7 +63,11 @@ select
|
|
|
81
63
|
playlist_saves_added,
|
|
82
64
|
playlist_saves_removed
|
|
83
65
|
from ${ctx.ref(t.database, t.schema, "p_playlist_traffic_source_a1_" + t.suffix)}
|
|
84
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
85
71
|
`;
|
|
86
72
|
})
|
|
87
73
|
.join(" union all "),
|
|
@@ -2,23 +2,25 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_playback", {
|
|
8
10
|
type: "incremental",
|
|
9
11
|
database: config.target.database,
|
|
10
|
-
schema: config.datasetStaging,
|
|
12
|
+
schema: config.target.datasetStaging,
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "playback_location_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,35 +31,14 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
35
|
},
|
|
34
36
|
description: "Staging table for YouTube Channel Playback Location data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_playback")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
@@ -65,7 +46,7 @@ declare source_date_checkpoint default (
|
|
|
65
46
|
|
|
66
47
|
select
|
|
67
48
|
_PARTITIONDATE as source_partition_date,
|
|
68
|
-
parse_date('%Y%m%d', date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
69
50
|
"${t.schema}" as site_nm,
|
|
70
51
|
current_timestamp() as updated_at,
|
|
71
52
|
channel_id,
|
|
@@ -82,7 +63,11 @@ select
|
|
|
82
63
|
red_views,
|
|
83
64
|
red_watch_time_minutes
|
|
84
65
|
from ${ctx.ref(t.database, t.schema, "p_channel_playback_location_a2_" + t.suffix)}
|
|
85
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
86
71
|
`;
|
|
87
72
|
})
|
|
88
73
|
.join(" union all "),
|
|
@@ -3,24 +3,25 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("stg_ytc_province", {
|
|
10
11
|
type: "incremental",
|
|
11
12
|
database: config.target.database,
|
|
12
|
-
schema: config.datasetStaging,
|
|
13
|
+
schema: config.target.datasetStaging,
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"video_id",
|
|
26
27
|
"live_or_on_demand",
|
|
@@ -30,43 +31,22 @@ module.exports = (config) => {
|
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
33
|
// make sure source partition and data dates match
|
|
33
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
34
35
|
},
|
|
35
36
|
columns: column_descriptions.column_descriptions,
|
|
36
37
|
description: "YT Channel Province Report Table - Staging",
|
|
37
38
|
})
|
|
38
|
-
.preOps(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
45
|
-
set source_date_checkpoint = (
|
|
46
|
-
${ctx.when(
|
|
47
|
-
ctx.incremental(),
|
|
48
|
-
`select
|
|
49
|
-
least(
|
|
50
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
51
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
52
|
-
)`,
|
|
53
|
-
`select date("${config.startDate}")`,
|
|
54
|
-
)}
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
${ctx.when(
|
|
58
|
-
ctx.incremental(),
|
|
59
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
60
|
-
)}
|
|
61
|
-
`,
|
|
62
|
-
)
|
|
39
|
+
.preOps((ctx) => {
|
|
40
|
+
// Get pre_operations to find updated source partitions
|
|
41
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_province")}`;
|
|
42
|
+
})
|
|
63
43
|
.query((ctx) =>
|
|
64
44
|
config.sources
|
|
65
45
|
.map((t) => {
|
|
66
46
|
return `
|
|
67
47
|
select
|
|
68
48
|
_PARTITIONDATE as source_partition_date,
|
|
69
|
-
parse_date('%Y%m%d',date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
70
50
|
"${t.schema}" as site_nm,
|
|
71
51
|
current_timestamp() as updated_at,
|
|
72
52
|
channel_id,
|
|
@@ -95,7 +75,11 @@ select
|
|
|
95
75
|
red_views,
|
|
96
76
|
red_watch_time_minutes
|
|
97
77
|
from ${ctx.ref(t.database, t.schema, "p_channel_province_a2_" + t.suffix)}
|
|
98
|
-
|
|
78
|
+
${ctx.when(
|
|
79
|
+
ctx.incremental(),
|
|
80
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
81
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
82
|
+
)}
|
|
99
83
|
`;
|
|
100
84
|
})
|
|
101
85
|
.join(" union all "),
|
|
@@ -2,23 +2,25 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_share_platform", {
|
|
8
10
|
type: "incremental",
|
|
9
11
|
database: config.target.database,
|
|
10
|
-
schema: config.datasetStaging,
|
|
12
|
+
schema: config.target.datasetStaging,
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "sharing_service"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -28,42 +30,21 @@ module.exports = (config) => {
|
|
|
28
30
|
],
|
|
29
31
|
],
|
|
30
32
|
// make sure source partition and data dates match
|
|
31
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
32
34
|
},
|
|
33
35
|
description: "Staging table for YouTube Channel Share Platform data",
|
|
34
36
|
})
|
|
35
|
-
.preOps(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
);
|
|
40
|
-
|
|
41
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
42
|
-
set source_date_checkpoint = (
|
|
43
|
-
${ctx.when(
|
|
44
|
-
ctx.incremental(),
|
|
45
|
-
`select
|
|
46
|
-
least(
|
|
47
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
48
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
49
|
-
)`,
|
|
50
|
-
`select date("${config.startDate}")`,
|
|
51
|
-
)}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
${ctx.when(
|
|
55
|
-
ctx.incremental(),
|
|
56
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
57
|
-
)}
|
|
58
|
-
`,
|
|
59
|
-
)
|
|
37
|
+
.preOps((ctx) => {
|
|
38
|
+
// Get pre_operations to find updated source partitions
|
|
39
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_share_platform")}`;
|
|
40
|
+
})
|
|
60
41
|
.query((ctx) =>
|
|
61
42
|
config.sources
|
|
62
43
|
.map((t) => {
|
|
63
44
|
return `
|
|
64
45
|
select
|
|
65
46
|
_PARTITIONDATE as source_partition_date,
|
|
66
|
-
parse_date('%Y%m%d', date) as
|
|
47
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
67
48
|
"${t.schema}" as site_nm,
|
|
68
49
|
current_timestamp() as updated_at,
|
|
69
50
|
channel_id,
|
|
@@ -74,7 +55,11 @@ select
|
|
|
74
55
|
cast(sharing_service as int) as sharing_service,
|
|
75
56
|
shares
|
|
76
57
|
from ${ctx.ref(t.database, t.schema, "p_channel_sharing_service_a1_" + t.suffix)}
|
|
77
|
-
|
|
58
|
+
${ctx.when(
|
|
59
|
+
ctx.incremental(),
|
|
60
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
61
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
62
|
+
)}
|
|
78
63
|
`;
|
|
79
64
|
})
|
|
80
65
|
.join(" union all "),
|
|
@@ -3,23 +3,24 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("stg_ytc_subtitles", {
|
|
10
11
|
type: "incremental",
|
|
11
12
|
database: config.target.database,
|
|
12
|
-
schema: config.datasetStaging,
|
|
13
|
+
schema: config.target.datasetStaging,
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
uniqueKeys: [
|
|
21
22
|
[
|
|
22
|
-
"
|
|
23
|
+
"data_date",
|
|
23
24
|
"channel_id",
|
|
24
25
|
"video_id",
|
|
25
26
|
"live_or_on_demand",
|
|
@@ -29,42 +30,22 @@ module.exports = (config) => {
|
|
|
29
30
|
"subtitle_language_autotranslated",
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
34
|
},
|
|
34
35
|
columns: column_descriptions.column_descriptions,
|
|
35
36
|
description: "YT Channel Subtitles Report Table - Staging",
|
|
36
37
|
})
|
|
37
|
-
.preOps(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
);
|
|
42
|
-
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_subtitles")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
64
45
|
return `
|
|
65
46
|
select
|
|
66
47
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d',date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
49
|
"${t.schema}" as site_nm,
|
|
69
50
|
current_timestamp() as updated_at,
|
|
70
51
|
channel_id,
|
|
@@ -81,7 +62,11 @@ select
|
|
|
81
62
|
red_views,
|
|
82
63
|
red_watch_time_minutes
|
|
83
64
|
from ${ctx.ref(t.database, t.schema, "p_channel_subtitles_a2_" + t.suffix)}
|
|
84
|
-
|
|
65
|
+
${ctx.when(
|
|
66
|
+
ctx.incremental(),
|
|
67
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
68
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
69
|
+
)}
|
|
85
70
|
`;
|
|
86
71
|
})
|
|
87
72
|
.join(" union all "),
|
|
@@ -2,23 +2,25 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_traffic_source", {
|
|
8
10
|
type: "incremental",
|
|
9
11
|
database: config.target.database,
|
|
10
|
-
schema: config.datasetStaging,
|
|
12
|
+
schema: config.target.datasetStaging,
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "traffic_source_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,60 +31,21 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
-
},
|
|
34
|
-
description: "Staging table for YouTube Channel Traffic Source data",
|
|
35
|
-
assertions: {
|
|
36
|
-
// make sure rows have unique dimensions
|
|
37
|
-
uniqueKeys: [
|
|
38
|
-
[
|
|
39
|
-
"interaction_date",
|
|
40
|
-
"channel_id",
|
|
41
|
-
"video_id",
|
|
42
|
-
"live_or_on_demand",
|
|
43
|
-
"subscribed_status",
|
|
44
|
-
"country_code",
|
|
45
|
-
"traffic_source_type",
|
|
46
|
-
"traffic_source_detail",
|
|
47
|
-
],
|
|
48
|
-
],
|
|
49
|
-
// make sure source partition and data dates match
|
|
50
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
51
35
|
},
|
|
52
36
|
description: "Staging table for YouTube Channel Traffic Source data",
|
|
53
37
|
})
|
|
54
|
-
.preOps(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
);
|
|
59
|
-
|
|
60
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
61
|
-
set source_date_checkpoint = (
|
|
62
|
-
${ctx.when(
|
|
63
|
-
ctx.incremental(),
|
|
64
|
-
`select
|
|
65
|
-
least(
|
|
66
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
67
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
68
|
-
)`,
|
|
69
|
-
`select date("${config.startDate}")`,
|
|
70
|
-
)}
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
${ctx.when(
|
|
74
|
-
ctx.incremental(),
|
|
75
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
76
|
-
)}
|
|
77
|
-
`,
|
|
78
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_traffic_source")}`;
|
|
41
|
+
})
|
|
79
42
|
.query((ctx) =>
|
|
80
43
|
config.sources
|
|
81
44
|
.map((t) => {
|
|
82
45
|
return `
|
|
83
46
|
select
|
|
84
47
|
_PARTITIONDATE as source_partition_date,
|
|
85
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
86
49
|
"${t.schema}" as site_nm,
|
|
87
50
|
current_timestamp() as updated_at,
|
|
88
51
|
channel_id,
|
|
@@ -99,7 +62,11 @@ select
|
|
|
99
62
|
red_views,
|
|
100
63
|
red_watch_time_minutes
|
|
101
64
|
from ${ctx.ref(t.database, t.schema, "p_channel_traffic_source_a2_" + t.suffix)}
|
|
102
|
-
|
|
65
|
+
${ctx.when(
|
|
66
|
+
ctx.incremental(),
|
|
67
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
68
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
69
|
+
)}
|
|
103
70
|
`;
|
|
104
71
|
})
|
|
105
72
|
.join(" union all "),
|