@ken-e/dataform-youtube 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/includes/column_descriptions.js +137 -95
- package/includes/definitions/sources/stg_ytc_annotation.js +15 -30
- package/includes/definitions/sources/stg_ytc_basic.js +14 -30
- package/includes/definitions/sources/stg_ytc_cards.js +15 -30
- package/includes/definitions/sources/stg_ytc_combined.js +14 -30
- package/includes/definitions/sources/stg_ytc_demographics.js +15 -30
- package/includes/definitions/sources/stg_ytc_device_os.js +15 -30
- package/includes/definitions/sources/stg_ytc_end_screens.js +13 -28
- package/includes/definitions/sources/stg_ytc_list_basic.js +14 -30
- package/includes/definitions/sources/stg_ytc_list_combined.js +15 -29
- package/includes/definitions/sources/stg_ytc_list_device_os.js +15 -29
- package/includes/definitions/sources/stg_ytc_list_playback.js +15 -29
- package/includes/definitions/sources/stg_ytc_list_province.js +14 -30
- package/includes/definitions/sources/stg_ytc_list_traffic_source.js +15 -29
- package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
- package/includes/definitions/sources/stg_ytc_playback.js +15 -30
- package/includes/definitions/sources/stg_ytc_province.js +14 -30
- package/includes/definitions/sources/stg_ytc_share_platform.js +15 -30
- package/includes/definitions/sources/stg_ytc_subtitles.js +14 -29
- package/includes/definitions/sources/stg_ytc_traffic_source.js +15 -48
- package/includes/definitions/ytc_annotation.js +14 -29
- package/includes/definitions/ytc_basic.js +14 -30
- package/includes/definitions/ytc_cards.js +14 -29
- package/includes/definitions/ytc_combined.js +14 -31
- package/includes/definitions/ytc_demographics.js +14 -28
- package/includes/definitions/ytc_demographics_views.js +15 -29
- package/includes/definitions/ytc_device_os.js +14 -28
- package/includes/definitions/ytc_end_screens.js +14 -28
- package/includes/definitions/ytc_list_basic.js +14 -28
- package/includes/definitions/ytc_list_combined.js +14 -27
- package/includes/definitions/ytc_list_device_os.js +14 -27
- package/includes/definitions/ytc_list_playback.js +14 -27
- package/includes/definitions/ytc_list_province.js +14 -28
- package/includes/definitions/ytc_list_traffic_source.js +14 -27
- package/includes/definitions/ytc_playback.js +14 -28
- package/includes/definitions/ytc_province.js +14 -28
- package/includes/definitions/ytc_share_platform.js +14 -28
- package/includes/definitions/ytc_subtitles.js +14 -27
- package/includes/definitions/ytc_traffic_source.js +14 -28
- package/includes/helpers.js +12 -0
- package/index.js +12 -2
- package/package.json +3 -3
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_demographics", {
|
|
@@ -11,14 +13,14 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "age_group", "gender"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,42 +31,21 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
35
|
},
|
|
34
36
|
description: "Staging table for YouTube Channel Demographics data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_demographics")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
64
45
|
return `
|
|
65
46
|
select
|
|
66
47
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
49
|
"${t.schema}" as site_nm,
|
|
69
50
|
current_timestamp() as updated_at,
|
|
70
51
|
channel_id,
|
|
@@ -76,7 +57,11 @@ select
|
|
|
76
57
|
gender,
|
|
77
58
|
views_percentage
|
|
78
59
|
from ${ctx.ref(t.database, t.schema, "p_channel_demographics_a1_" + t.suffix)}
|
|
79
|
-
|
|
60
|
+
${ctx.when(
|
|
61
|
+
ctx.incremental(),
|
|
62
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
63
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
64
|
+
)}
|
|
80
65
|
`;
|
|
81
66
|
})
|
|
82
67
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_device_os", {
|
|
@@ -11,14 +13,14 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "device_type", "operating_system"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,35 +31,14 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
35
|
},
|
|
34
36
|
description: "Staging table for YouTube Channel Device and OS data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_device_os")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
@@ -65,7 +46,7 @@ declare source_date_checkpoint default (
|
|
|
65
46
|
|
|
66
47
|
select
|
|
67
48
|
_PARTITIONDATE as source_partition_date,
|
|
68
|
-
parse_date('%Y%m%d', date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
69
50
|
"${t.schema}" as site_nm,
|
|
70
51
|
current_timestamp() as updated_at,
|
|
71
52
|
channel_id,
|
|
@@ -82,7 +63,11 @@ select
|
|
|
82
63
|
red_views,
|
|
83
64
|
red_watch_time_minutes
|
|
84
65
|
from ${ctx.ref(t.database, t.schema, "p_channel_device_os_a2_" + t.suffix)}
|
|
85
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
86
71
|
`;
|
|
87
72
|
})
|
|
88
73
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_end_screens", {
|
|
@@ -11,43 +13,22 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "end_screen_element_type"],
|
|
16
18
|
},
|
|
17
19
|
description: "Staging table for YouTube Channel End Screen data",
|
|
18
20
|
})
|
|
19
|
-
.preOps(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
);
|
|
24
|
-
|
|
25
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
26
|
-
set source_date_checkpoint = (
|
|
27
|
-
${ctx.when(
|
|
28
|
-
ctx.incremental(),
|
|
29
|
-
`select
|
|
30
|
-
least(
|
|
31
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
32
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
33
|
-
)`,
|
|
34
|
-
`select date("${config.startDate}")`,
|
|
35
|
-
)}
|
|
36
|
-
);
|
|
37
|
-
|
|
38
|
-
${ctx.when(
|
|
39
|
-
ctx.incremental(),
|
|
40
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
41
|
-
)}
|
|
42
|
-
`,
|
|
43
|
-
)
|
|
21
|
+
.preOps((ctx) => {
|
|
22
|
+
// Get pre_operations to find updated source partitions
|
|
23
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_end_screens")}`;
|
|
24
|
+
})
|
|
44
25
|
.query((ctx) =>
|
|
45
26
|
config.sources
|
|
46
27
|
.map((t) => {
|
|
47
28
|
return `
|
|
48
29
|
select
|
|
49
30
|
_PARTITIONDATE as source_partition_date,
|
|
50
|
-
parse_date('%Y%m%d', date) as
|
|
31
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
51
32
|
"${t.schema}" as site_nm,
|
|
52
33
|
current_timestamp() as updated_at,
|
|
53
34
|
channel_id,
|
|
@@ -61,7 +42,11 @@ select
|
|
|
61
42
|
end_screen_element_impressions,
|
|
62
43
|
end_screen_element_clicks
|
|
63
44
|
from ${ctx.ref(t.database, t.schema, "p_channel_end_screens_a1_" + t.suffix)}
|
|
64
|
-
|
|
45
|
+
${ctx.when(
|
|
46
|
+
ctx.incremental(),
|
|
47
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
48
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
49
|
+
)}
|
|
65
50
|
`;
|
|
66
51
|
})
|
|
67
52
|
.join(" union all "),
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
@@ -13,14 +14,14 @@ module.exports = (config) => {
|
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["playlist_id", "video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"playlist_id",
|
|
26
27
|
"video_id",
|
|
@@ -30,36 +31,15 @@ module.exports = (config) => {
|
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
33
|
// make sure source partition and data dates match
|
|
33
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
34
35
|
},
|
|
35
36
|
columns: column_descriptions.column_descriptions,
|
|
36
37
|
description: "YT Channel Playlist Basic Report Table - Staging",
|
|
37
38
|
})
|
|
38
|
-
.preOps(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
45
|
-
set source_date_checkpoint = (
|
|
46
|
-
${ctx.when(
|
|
47
|
-
ctx.incremental(),
|
|
48
|
-
`select
|
|
49
|
-
least(
|
|
50
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
51
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
52
|
-
)`,
|
|
53
|
-
`select date("${config.startDate}")`,
|
|
54
|
-
)}
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
${ctx.when(
|
|
58
|
-
ctx.incremental(),
|
|
59
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
60
|
-
)}
|
|
61
|
-
`,
|
|
62
|
-
)
|
|
39
|
+
.preOps((ctx) => {
|
|
40
|
+
// Get pre_operations to find updated source partitions
|
|
41
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_basic")}`;
|
|
42
|
+
})
|
|
63
43
|
.query((ctx) =>
|
|
64
44
|
config.sources
|
|
65
45
|
.map((t) => {
|
|
@@ -67,7 +47,7 @@ declare source_date_checkpoint default (
|
|
|
67
47
|
|
|
68
48
|
select
|
|
69
49
|
_PARTITIONDATE as source_partition_date,
|
|
70
|
-
parse_date('%Y%m%d',date) as
|
|
50
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
71
51
|
"${t.schema}" as site_nm,
|
|
72
52
|
current_timestamp() as updated_at,
|
|
73
53
|
channel_id,
|
|
@@ -83,7 +63,11 @@ select
|
|
|
83
63
|
playlist_saves_added,
|
|
84
64
|
playlist_saves_removed
|
|
85
65
|
from ${ctx.ref(t.database, t.schema, "p_playlist_basic_a1_" + t.suffix)}
|
|
86
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
87
71
|
`;
|
|
88
72
|
})
|
|
89
73
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_list_combined", {
|
|
@@ -11,13 +13,13 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["playlist_id", "video_id", "device_type", "operating_system"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
uniqueKeys: [
|
|
19
21
|
[
|
|
20
|
-
"
|
|
22
|
+
"data_date",
|
|
21
23
|
"channel_id",
|
|
22
24
|
"playlist_id",
|
|
23
25
|
"video_id",
|
|
@@ -30,41 +32,21 @@ module.exports = (config) => {
|
|
|
30
32
|
"operating_system",
|
|
31
33
|
],
|
|
32
34
|
],
|
|
33
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
35
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
34
36
|
},
|
|
35
37
|
description: "Staging table for YouTube Channel Playlist Combined data",
|
|
36
38
|
})
|
|
37
|
-
.preOps(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
);
|
|
42
|
-
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
39
|
+
.preOps((ctx) => {
|
|
40
|
+
// Get pre_operations to find updated source partitions
|
|
41
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_combined")}`;
|
|
42
|
+
})
|
|
61
43
|
.query((ctx) =>
|
|
62
44
|
config.sources
|
|
63
45
|
.map((t) => {
|
|
64
46
|
return `
|
|
65
47
|
select
|
|
66
48
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d', date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
50
|
"${t.schema}" as site_nm,
|
|
69
51
|
current_timestamp() as updated_at,
|
|
70
52
|
channel_id,
|
|
@@ -84,7 +66,11 @@ select
|
|
|
84
66
|
playlist_saves_added,
|
|
85
67
|
playlist_saves_removed
|
|
86
68
|
from ${ctx.ref(t.database, t.schema, "p_playlist_combined_a1_" + t.suffix)}
|
|
87
|
-
|
|
69
|
+
${ctx.when(
|
|
70
|
+
ctx.incremental(),
|
|
71
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
72
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
73
|
+
)}
|
|
88
74
|
`;
|
|
89
75
|
})
|
|
90
76
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_list_device_os", {
|
|
@@ -11,13 +13,13 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["playlist_id", "video_id", "device_type", "operating_system"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
uniqueKeys: [
|
|
19
21
|
[
|
|
20
|
-
"
|
|
22
|
+
"data_date",
|
|
21
23
|
"channel_id",
|
|
22
24
|
"playlist_id",
|
|
23
25
|
"video_id",
|
|
@@ -28,35 +30,15 @@ module.exports = (config) => {
|
|
|
28
30
|
"operating_system",
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
32
34
|
},
|
|
33
35
|
description:
|
|
34
36
|
"Staging table for YouTube Channel Playlist Device and OS data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
set source_date_checkpoint = (
|
|
43
|
-
${ctx.when(
|
|
44
|
-
ctx.incremental(),
|
|
45
|
-
`select
|
|
46
|
-
least(
|
|
47
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
48
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
49
|
-
)`,
|
|
50
|
-
`select date("${config.startDate}")`,
|
|
51
|
-
)}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
${ctx.when(
|
|
55
|
-
ctx.incremental(),
|
|
56
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
57
|
-
)}
|
|
58
|
-
`,
|
|
59
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_device_os")}`;
|
|
41
|
+
})
|
|
60
42
|
.query((ctx) =>
|
|
61
43
|
config.sources
|
|
62
44
|
.map((t) => {
|
|
@@ -64,7 +46,7 @@ declare source_date_checkpoint default (
|
|
|
64
46
|
|
|
65
47
|
select
|
|
66
48
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d', date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
50
|
"${t.schema}" as site_nm,
|
|
69
51
|
current_timestamp() as updated_at,
|
|
70
52
|
channel_id,
|
|
@@ -82,7 +64,11 @@ select
|
|
|
82
64
|
playlist_saves_added,
|
|
83
65
|
playlist_saves_removed
|
|
84
66
|
from ${ctx.ref(t.database, t.schema, "p_playlist_device_os_a1_" + t.suffix)}
|
|
85
|
-
|
|
67
|
+
${ctx.when(
|
|
68
|
+
ctx.incremental(),
|
|
69
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
70
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
71
|
+
)}
|
|
86
72
|
`;
|
|
87
73
|
})
|
|
88
74
|
.join(" union all "),
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_list_playback", {
|
|
@@ -11,13 +13,13 @@ module.exports = (config) => {
|
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["playlist_id", "video_id", "playback_location_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
uniqueKeys: [
|
|
19
21
|
[
|
|
20
|
-
"
|
|
22
|
+
"data_date",
|
|
21
23
|
"channel_id",
|
|
22
24
|
"playlist_id",
|
|
23
25
|
"video_id",
|
|
@@ -28,42 +30,22 @@ module.exports = (config) => {
|
|
|
28
30
|
"playback_location_detail",
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
32
34
|
},
|
|
33
35
|
description:
|
|
34
36
|
"Staging table for YouTube Channel Playlist Playback Location data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
set source_date_checkpoint = (
|
|
43
|
-
${ctx.when(
|
|
44
|
-
ctx.incremental(),
|
|
45
|
-
`select
|
|
46
|
-
least(
|
|
47
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
48
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
49
|
-
)`,
|
|
50
|
-
`select date("${config.startDate}")`,
|
|
51
|
-
)}
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
${ctx.when(
|
|
55
|
-
ctx.incremental(),
|
|
56
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
57
|
-
)}
|
|
58
|
-
`,
|
|
59
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_playback")}`;
|
|
41
|
+
})
|
|
60
42
|
.query((ctx) =>
|
|
61
43
|
config.sources
|
|
62
44
|
.map((t) => {
|
|
63
45
|
return `
|
|
64
46
|
select
|
|
65
47
|
_PARTITIONDATE as source_partition_date,
|
|
66
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
67
49
|
"${t.schema}" as site_nm,
|
|
68
50
|
current_timestamp() as updated_at,
|
|
69
51
|
channel_id,
|
|
@@ -81,7 +63,11 @@ select
|
|
|
81
63
|
playlist_saves_added,
|
|
82
64
|
playlist_saves_removed
|
|
83
65
|
from ${ctx.ref(t.database, t.schema, "p_playlist_playback_location_a1_" + t.suffix)}
|
|
84
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
85
71
|
`;
|
|
86
72
|
})
|
|
87
73
|
.join(" union all "),
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
@@ -13,14 +14,14 @@ module.exports = (config) => {
|
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["playlist_id", "video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"playlist_id",
|
|
26
27
|
"video_id",
|
|
@@ -30,43 +31,22 @@ module.exports = (config) => {
|
|
|
30
31
|
],
|
|
31
32
|
],
|
|
32
33
|
// make sure source partition and data dates match
|
|
33
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
34
35
|
},
|
|
35
36
|
columns: column_descriptions.column_descriptions,
|
|
36
37
|
description: "YT Channel Playlist Province Report Table - Staging",
|
|
37
38
|
})
|
|
38
|
-
.preOps(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
);
|
|
43
|
-
|
|
44
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
45
|
-
set source_date_checkpoint = (
|
|
46
|
-
${ctx.when(
|
|
47
|
-
ctx.incremental(),
|
|
48
|
-
`select
|
|
49
|
-
least(
|
|
50
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
51
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
52
|
-
)`,
|
|
53
|
-
`select date("${config.startDate}")`,
|
|
54
|
-
)}
|
|
55
|
-
);
|
|
56
|
-
|
|
57
|
-
${ctx.when(
|
|
58
|
-
ctx.incremental(),
|
|
59
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
60
|
-
)}
|
|
61
|
-
`,
|
|
62
|
-
)
|
|
39
|
+
.preOps((ctx) => {
|
|
40
|
+
// Get pre_operations to find updated source partitions
|
|
41
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_list_province")}`;
|
|
42
|
+
})
|
|
63
43
|
.query((ctx) =>
|
|
64
44
|
config.sources
|
|
65
45
|
.map((t) => {
|
|
66
46
|
return `
|
|
67
47
|
select
|
|
68
48
|
_PARTITIONDATE as source_partition_date,
|
|
69
|
-
parse_date('%Y%m%d',date) as
|
|
49
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
70
50
|
"${t.schema}" as site_nm,
|
|
71
51
|
current_timestamp() as updated_at,
|
|
72
52
|
channel_id,
|
|
@@ -83,7 +63,11 @@ select
|
|
|
83
63
|
playlist_saves_added,
|
|
84
64
|
playlist_saves_removed
|
|
85
65
|
from ${ctx.ref(t.database, t.schema, "p_playlist_province_a1_" + t.suffix)}
|
|
86
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
87
71
|
`;
|
|
88
72
|
})
|
|
89
73
|
.join(" union all "),
|