@ken-e/dataform-youtube 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/includes/column_descriptions.js +137 -95
- package/includes/definitions/sources/stg_ytc_annotation.js +15 -30
- package/includes/definitions/sources/stg_ytc_basic.js +14 -30
- package/includes/definitions/sources/stg_ytc_cards.js +15 -30
- package/includes/definitions/sources/stg_ytc_combined.js +14 -30
- package/includes/definitions/sources/stg_ytc_demographics.js +15 -30
- package/includes/definitions/sources/stg_ytc_device_os.js +15 -30
- package/includes/definitions/sources/stg_ytc_end_screens.js +13 -28
- package/includes/definitions/sources/stg_ytc_list_basic.js +14 -30
- package/includes/definitions/sources/stg_ytc_list_combined.js +15 -29
- package/includes/definitions/sources/stg_ytc_list_device_os.js +15 -29
- package/includes/definitions/sources/stg_ytc_list_playback.js +15 -29
- package/includes/definitions/sources/stg_ytc_list_province.js +14 -30
- package/includes/definitions/sources/stg_ytc_list_traffic_source.js +15 -29
- package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
- package/includes/definitions/sources/stg_ytc_playback.js +15 -30
- package/includes/definitions/sources/stg_ytc_province.js +14 -30
- package/includes/definitions/sources/stg_ytc_share_platform.js +15 -30
- package/includes/definitions/sources/stg_ytc_subtitles.js +14 -29
- package/includes/definitions/sources/stg_ytc_traffic_source.js +15 -48
- package/includes/definitions/ytc_annotation.js +14 -29
- package/includes/definitions/ytc_basic.js +14 -30
- package/includes/definitions/ytc_cards.js +14 -29
- package/includes/definitions/ytc_combined.js +14 -31
- package/includes/definitions/ytc_demographics.js +14 -28
- package/includes/definitions/ytc_demographics_views.js +15 -29
- package/includes/definitions/ytc_device_os.js +14 -28
- package/includes/definitions/ytc_end_screens.js +14 -28
- package/includes/definitions/ytc_list_basic.js +14 -28
- package/includes/definitions/ytc_list_combined.js +14 -27
- package/includes/definitions/ytc_list_device_os.js +14 -27
- package/includes/definitions/ytc_list_playback.js +14 -27
- package/includes/definitions/ytc_list_province.js +14 -28
- package/includes/definitions/ytc_list_traffic_source.js +14 -27
- package/includes/definitions/ytc_playback.js +14 -28
- package/includes/definitions/ytc_province.js +14 -28
- package/includes/definitions/ytc_share_platform.js +14 -28
- package/includes/definitions/ytc_subtitles.js +14 -27
- package/includes/definitions/ytc_traffic_source.js +14 -28
- package/includes/helpers.js +12 -0
- package/index.js +12 -2
- package/package.json +3 -3
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_list_traffic_source", {
|
|
@@ -11,13 +13,13 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["playlist_id", "video_id", "traffic_source_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
uniqueKeys: [
|
|
19
21
|
[
|
|
20
|
-
"
|
|
22
|
+
"data_date",
|
|
21
23
|
"channel_id",
|
|
22
24
|
"playlist_id",
|
|
23
25
|
"video_id",
|
|
@@ -28,42 +30,22 @@ module.exports = (config) => {
|
|
|
28
30
|
"traffic_source_detail",
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
32
34
|
},
|
|
33
35
|
description:
|
|
34
36
|
"Staging table for YouTube Channel Playlist Traffic Source data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
set source_date_checkpoint = (
|
|
43
|
-
${ctx.when(
|
|
44
|
-
ctx.incremental(),
|
|
45
|
-
`select
|
|
46
|
-
least(
|
|
47
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
48
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
49
|
-
)`,
|
|
50
|
-
`select date("${config.startDate}")`,
|
|
51
|
-
)}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
${ctx.when(
|
|
55
|
-
ctx.incremental(),
|
|
56
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
57
|
-
)}
|
|
58
|
-
`,
|
|
59
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_traffic_source")}`;
|
|
41
|
+
})
|
|
60
42
|
.query((ctx) =>
|
|
61
43
|
config.sources
|
|
62
44
|
.map((t) => {
|
|
63
45
|
return `
|
|
64
46
|
select
|
|
65
47
|
_PARTITIONDATE as source_partition_date,
|
|
66
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
67
49
|
"${t.schema}" as site_nm,
|
|
68
50
|
current_timestamp() as updated_at,
|
|
69
51
|
channel_id,
|
|
@@ -81,7 +63,11 @@ select
|
|
|
81
63
|
playlist_saves_added,
|
|
82
64
|
playlist_saves_removed
|
|
83
65
|
from ${ctx.ref(t.database, t.schema, "p_playlist_traffic_source_a1_" + t.suffix)}
|
|
84
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
85
71
|
`;
|
|
86
72
|
})
|
|
87
73
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_playback", {
|
|
@@ -11,14 +13,14 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "playback_location_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,35 +31,14 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
35
|
},
|
|
34
36
|
description: "Staging table for YouTube Channel Playback Location data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_playback")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
@@ -65,7 +46,7 @@ declare source_date_checkpoint default (
|
|
|
65
46
|
|
|
66
47
|
select
|
|
67
48
|
_PARTITIONDATE as source_partition_date,
|
|
68
|
-
parse_date('%Y%m%d', date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
69
50
|
"${t.schema}" as site_nm,
|
|
70
51
|
current_timestamp() as updated_at,
|
|
71
52
|
channel_id,
|
|
@@ -82,7 +63,11 @@ select
|
|
|
82
63
|
red_views,
|
|
83
64
|
red_watch_time_minutes
|
|
84
65
|
from ${ctx.ref(t.database, t.schema, "p_channel_playback_location_a2_" + t.suffix)}
|
|
85
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
86
71
|
`;
|
|
87
72
|
})
|
|
88
73
|
.join(" union all "),
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
@@ -13,14 +14,14 @@ module.exports = (config) => {
|
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"video_id",
|
|
26
27
|
"live_or_on_demand",
|
|
@@ -30,43 +31,22 @@ module.exports = (config) => {
|
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
33
|
// make sure source partition and data dates match
|
|
33
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
34
35
|
},
|
|
35
36
|
columns: column_descriptions.column_descriptions,
|
|
36
37
|
description: "YT Channel Province Report Table - Staging",
|
|
37
38
|
})
|
|
38
|
-
.preOps(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
45
|
-
set source_date_checkpoint = (
|
|
46
|
-
${ctx.when(
|
|
47
|
-
ctx.incremental(),
|
|
48
|
-
`select
|
|
49
|
-
least(
|
|
50
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
51
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
52
|
-
)`,
|
|
53
|
-
`select date("${config.startDate}")`,
|
|
54
|
-
)}
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
${ctx.when(
|
|
58
|
-
ctx.incremental(),
|
|
59
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
60
|
-
)}
|
|
61
|
-
`,
|
|
62
|
-
)
|
|
39
|
+
.preOps((ctx) => {
|
|
40
|
+
// Get pre_operations to find updated source partitions
|
|
41
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_province")}`;
|
|
42
|
+
})
|
|
63
43
|
.query((ctx) =>
|
|
64
44
|
config.sources
|
|
65
45
|
.map((t) => {
|
|
66
46
|
return `
|
|
67
47
|
select
|
|
68
48
|
_PARTITIONDATE as source_partition_date,
|
|
69
|
-
parse_date('%Y%m%d',date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
70
50
|
"${t.schema}" as site_nm,
|
|
71
51
|
current_timestamp() as updated_at,
|
|
72
52
|
channel_id,
|
|
@@ -95,7 +75,11 @@ select
|
|
|
95
75
|
red_views,
|
|
96
76
|
red_watch_time_minutes
|
|
97
77
|
from ${ctx.ref(t.database, t.schema, "p_channel_province_a2_" + t.suffix)}
|
|
98
|
-
|
|
78
|
+
${ctx.when(
|
|
79
|
+
ctx.incremental(),
|
|
80
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
81
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
82
|
+
)}
|
|
99
83
|
`;
|
|
100
84
|
})
|
|
101
85
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_share_platform", {
|
|
@@ -11,14 +13,14 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "sharing_service"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -28,42 +30,21 @@ module.exports = (config) => {
|
|
|
28
30
|
],
|
|
29
31
|
],
|
|
30
32
|
// make sure source partition and data dates match
|
|
31
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
32
34
|
},
|
|
33
35
|
description: "Staging table for YouTube Channel Share Platform data",
|
|
34
36
|
})
|
|
35
|
-
.preOps(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
);
|
|
40
|
-
|
|
41
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
42
|
-
set source_date_checkpoint = (
|
|
43
|
-
${ctx.when(
|
|
44
|
-
ctx.incremental(),
|
|
45
|
-
`select
|
|
46
|
-
least(
|
|
47
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
48
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
49
|
-
)`,
|
|
50
|
-
`select date("${config.startDate}")`,
|
|
51
|
-
)}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
${ctx.when(
|
|
55
|
-
ctx.incremental(),
|
|
56
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
57
|
-
)}
|
|
58
|
-
`,
|
|
59
|
-
)
|
|
37
|
+
.preOps((ctx) => {
|
|
38
|
+
// Get pre_operations to find updated source partitions
|
|
39
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_share_platform")}`;
|
|
40
|
+
})
|
|
60
41
|
.query((ctx) =>
|
|
61
42
|
config.sources
|
|
62
43
|
.map((t) => {
|
|
63
44
|
return `
|
|
64
45
|
select
|
|
65
46
|
_PARTITIONDATE as source_partition_date,
|
|
66
|
-
parse_date('%Y%m%d', date) as
|
|
47
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
67
48
|
"${t.schema}" as site_nm,
|
|
68
49
|
current_timestamp() as updated_at,
|
|
69
50
|
channel_id,
|
|
@@ -74,7 +55,11 @@ select
|
|
|
74
55
|
cast(sharing_service as int) as sharing_service,
|
|
75
56
|
shares
|
|
76
57
|
from ${ctx.ref(t.database, t.schema, "p_channel_sharing_service_a1_" + t.suffix)}
|
|
77
|
-
|
|
58
|
+
${ctx.when(
|
|
59
|
+
ctx.incremental(),
|
|
60
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
61
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
62
|
+
)}
|
|
78
63
|
`;
|
|
79
64
|
})
|
|
80
65
|
.join(" union all "),
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
@@ -13,13 +14,13 @@ module.exports = (config) => {
|
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
uniqueKeys: [
|
|
21
22
|
[
|
|
22
|
-
"
|
|
23
|
+
"data_date",
|
|
23
24
|
"channel_id",
|
|
24
25
|
"video_id",
|
|
25
26
|
"live_or_on_demand",
|
|
@@ -29,42 +30,22 @@ module.exports = (config) => {
|
|
|
29
30
|
"subtitle_language_autotranslated",
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
34
|
},
|
|
34
35
|
columns: column_descriptions.column_descriptions,
|
|
35
36
|
description: "YT Channel Subtitles Report Table - Staging",
|
|
36
37
|
})
|
|
37
|
-
.preOps(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
);
|
|
42
|
-
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_subtitles")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
64
45
|
return `
|
|
65
46
|
select
|
|
66
47
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d',date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
49
|
"${t.schema}" as site_nm,
|
|
69
50
|
current_timestamp() as updated_at,
|
|
70
51
|
channel_id,
|
|
@@ -81,7 +62,11 @@ select
|
|
|
81
62
|
red_views,
|
|
82
63
|
red_watch_time_minutes
|
|
83
64
|
from ${ctx.ref(t.database, t.schema, "p_channel_subtitles_a2_" + t.suffix)}
|
|
84
|
-
|
|
65
|
+
${ctx.when(
|
|
66
|
+
ctx.incremental(),
|
|
67
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
68
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
69
|
+
)}
|
|
85
70
|
`;
|
|
86
71
|
})
|
|
87
72
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_traffic_source", {
|
|
@@ -11,14 +13,14 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "traffic_source_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,60 +31,21 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
-
},
|
|
34
|
-
description: "Staging table for YouTube Channel Traffic Source data",
|
|
35
|
-
assertions: {
|
|
36
|
-
// make sure rows have unique dimensions
|
|
37
|
-
uniqueKeys: [
|
|
38
|
-
[
|
|
39
|
-
"interaction_date",
|
|
40
|
-
"channel_id",
|
|
41
|
-
"video_id",
|
|
42
|
-
"live_or_on_demand",
|
|
43
|
-
"subscribed_status",
|
|
44
|
-
"country_code",
|
|
45
|
-
"traffic_source_type",
|
|
46
|
-
"traffic_source_detail",
|
|
47
|
-
],
|
|
48
|
-
],
|
|
49
|
-
// make sure source partition and data dates match
|
|
50
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
51
35
|
},
|
|
52
36
|
description: "Staging table for YouTube Channel Traffic Source data",
|
|
53
37
|
})
|
|
54
|
-
.preOps(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
);
|
|
59
|
-
|
|
60
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
61
|
-
set source_date_checkpoint = (
|
|
62
|
-
${ctx.when(
|
|
63
|
-
ctx.incremental(),
|
|
64
|
-
`select
|
|
65
|
-
least(
|
|
66
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
67
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
68
|
-
)`,
|
|
69
|
-
`select date("${config.startDate}")`,
|
|
70
|
-
)}
|
|
71
|
-
);
|
|
72
|
-
|
|
73
|
-
${ctx.when(
|
|
74
|
-
ctx.incremental(),
|
|
75
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
76
|
-
)}
|
|
77
|
-
`,
|
|
78
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_traffic_source")}`;
|
|
41
|
+
})
|
|
79
42
|
.query((ctx) =>
|
|
80
43
|
config.sources
|
|
81
44
|
.map((t) => {
|
|
82
45
|
return `
|
|
83
46
|
select
|
|
84
47
|
_PARTITIONDATE as source_partition_date,
|
|
85
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
86
49
|
"${t.schema}" as site_nm,
|
|
87
50
|
current_timestamp() as updated_at,
|
|
88
51
|
channel_id,
|
|
@@ -99,7 +62,11 @@ select
|
|
|
99
62
|
red_views,
|
|
100
63
|
red_watch_time_minutes
|
|
101
64
|
from ${ctx.ref(t.database, t.schema, "p_channel_traffic_source_a2_" + t.suffix)}
|
|
102
|
-
|
|
65
|
+
${ctx.when(
|
|
66
|
+
ctx.incremental(),
|
|
67
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
68
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
69
|
+
)}
|
|
103
70
|
`;
|
|
104
71
|
})
|
|
105
72
|
.join(" union all "),
|
|
@@ -3,45 +3,25 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../column_descriptions");
|
|
6
|
+
const helpers = require("../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("ytc_annotation", {
|
|
10
11
|
type: "incremental",
|
|
11
|
-
schema: config.
|
|
12
|
+
schema: config.outputSchema,
|
|
12
13
|
tags: ["youtube", "output", "daily"],
|
|
13
14
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
15
|
+
partitionBy: "data_date",
|
|
15
16
|
clusterBy: ["video_id", "annotation_type"],
|
|
16
17
|
},
|
|
17
18
|
columns: column_descriptions.column_descriptions,
|
|
18
19
|
description: "YT Channel Annotation Report Table - Intermediate",
|
|
19
20
|
})
|
|
20
|
-
.preOps(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
);
|
|
25
|
-
|
|
26
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
27
|
-
set interaction_date_checkpoint = (
|
|
28
|
-
${ctx.when(
|
|
29
|
-
ctx.incremental(),
|
|
30
|
-
`select
|
|
31
|
-
least(
|
|
32
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
33
|
-
(select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
34
|
-
)`,
|
|
35
|
-
`select date("${config.startDate}")`,
|
|
36
|
-
)}
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
${ctx.when(
|
|
40
|
-
ctx.incremental(),
|
|
41
|
-
`delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
|
|
42
|
-
)}
|
|
43
|
-
`,
|
|
44
|
-
)
|
|
21
|
+
.preOps((ctx) => {
|
|
22
|
+
// Get pre_operations to find updated output partitions
|
|
23
|
+
return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_annotation"))}`;
|
|
24
|
+
})
|
|
45
25
|
.query(
|
|
46
26
|
(ctx) => `
|
|
47
27
|
|
|
@@ -58,8 +38,13 @@ left join ${ctx.ref(
|
|
|
58
38
|
config.titlesTable,
|
|
59
39
|
)} as titles
|
|
60
40
|
using (video_id)
|
|
61
|
-
|
|
62
|
-
|
|
41
|
+
${ctx.when(
|
|
42
|
+
ctx.incremental(),
|
|
43
|
+
`where
|
|
44
|
+
data_date in unnest(partitions_to_update)`,
|
|
45
|
+
`where
|
|
46
|
+
data_date > date '${config.startDate}'`,
|
|
47
|
+
)}
|
|
63
48
|
`,
|
|
64
49
|
);
|
|
65
50
|
};
|
|
@@ -3,47 +3,25 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../column_descriptions");
|
|
6
|
+
const helpers = require("../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("ytc_basic", {
|
|
10
11
|
type: "incremental",
|
|
11
|
-
schema: config.
|
|
12
|
+
schema: config.outputSchema,
|
|
12
13
|
tags: ["youtube", "output", "daily"],
|
|
13
14
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
15
|
+
partitionBy: "data_date",
|
|
15
16
|
clusterBy: ["video_id"],
|
|
16
17
|
},
|
|
17
18
|
columns: column_descriptions.column_descriptions,
|
|
18
19
|
description: "YT Channel Basic Report Table - Intermediate",
|
|
19
20
|
})
|
|
20
|
-
.preOps(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
declare interaction_date_checkpoint default (
|
|
25
|
-
select date("${config.startDate}")
|
|
26
|
-
);
|
|
27
|
-
|
|
28
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
29
|
-
set interaction_date_checkpoint = (
|
|
30
|
-
${ctx.when(
|
|
31
|
-
ctx.incremental(),
|
|
32
|
-
`select
|
|
33
|
-
least(
|
|
34
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
35
|
-
(select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
36
|
-
)`,
|
|
37
|
-
`select date("${config.startDate}")`,
|
|
38
|
-
)}
|
|
39
|
-
);
|
|
40
|
-
|
|
41
|
-
${ctx.when(
|
|
42
|
-
ctx.incremental(),
|
|
43
|
-
`delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
|
|
44
|
-
)}
|
|
45
|
-
`,
|
|
46
|
-
)
|
|
21
|
+
.preOps((ctx) => {
|
|
22
|
+
// Get pre_operations to find updated output partitions
|
|
23
|
+
return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_basic"))}`;
|
|
24
|
+
})
|
|
47
25
|
.query(
|
|
48
26
|
(ctx) => `
|
|
49
27
|
|
|
@@ -54,7 +32,13 @@ with int_ex_titles as (
|
|
|
54
32
|
views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds,
|
|
55
33
|
subscribers_gained - subscribers_lost as subscribers_net,
|
|
56
34
|
from ${ctx.ref("stg_ytc_basic")}
|
|
57
|
-
|
|
35
|
+
${ctx.when(
|
|
36
|
+
ctx.incremental(),
|
|
37
|
+
`where
|
|
38
|
+
data_date in unnest(partitions_to_update)`,
|
|
39
|
+
`where
|
|
40
|
+
data_date > date '${config.startDate}'`,
|
|
41
|
+
)}
|
|
58
42
|
)
|
|
59
43
|
|
|
60
44
|
select
|