@ken-e/dataform-youtube 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/includes/column_descriptions.js +137 -95
- package/includes/definitions/sources/stg_ytc_annotation.js +16 -31
- package/includes/definitions/sources/stg_ytc_basic.js +15 -31
- package/includes/definitions/sources/stg_ytc_cards.js +16 -31
- package/includes/definitions/sources/stg_ytc_combined.js +15 -31
- package/includes/definitions/sources/stg_ytc_demographics.js +16 -31
- package/includes/definitions/sources/stg_ytc_device_os.js +16 -31
- package/includes/definitions/sources/stg_ytc_end_screens.js +14 -29
- package/includes/definitions/sources/stg_ytc_list_basic.js +15 -31
- package/includes/definitions/sources/stg_ytc_list_combined.js +16 -30
- package/includes/definitions/sources/stg_ytc_list_device_os.js +16 -30
- package/includes/definitions/sources/stg_ytc_list_playback.js +16 -30
- package/includes/definitions/sources/stg_ytc_list_province.js +15 -31
- package/includes/definitions/sources/stg_ytc_list_traffic_source.js +16 -30
- package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
- package/includes/definitions/sources/stg_ytc_playback.js +16 -31
- package/includes/definitions/sources/stg_ytc_province.js +15 -31
- package/includes/definitions/sources/stg_ytc_share_platform.js +16 -31
- package/includes/definitions/sources/stg_ytc_subtitles.js +15 -30
- package/includes/definitions/sources/stg_ytc_traffic_source.js +16 -49
- package/includes/definitions/ytc_annotation.js +14 -29
- package/includes/definitions/ytc_basic.js +14 -30
- package/includes/definitions/ytc_cards.js +14 -29
- package/includes/definitions/ytc_combined.js +14 -31
- package/includes/definitions/ytc_demographics.js +14 -28
- package/includes/definitions/ytc_demographics_views.js +15 -29
- package/includes/definitions/ytc_device_os.js +14 -28
- package/includes/definitions/ytc_end_screens.js +14 -28
- package/includes/definitions/ytc_list_basic.js +14 -28
- package/includes/definitions/ytc_list_combined.js +14 -27
- package/includes/definitions/ytc_list_device_os.js +14 -27
- package/includes/definitions/ytc_list_playback.js +14 -27
- package/includes/definitions/ytc_list_province.js +14 -28
- package/includes/definitions/ytc_list_traffic_source.js +14 -27
- package/includes/definitions/ytc_playback.js +14 -28
- package/includes/definitions/ytc_province.js +14 -28
- package/includes/definitions/ytc_share_platform.js +14 -28
- package/includes/definitions/ytc_subtitles.js +14 -27
- package/includes/definitions/ytc_traffic_source.js +14 -28
- package/includes/helpers.js +12 -0
- package/index.js +12 -2
- package/package.json +3 -3
|
@@ -1,95 +1,137 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Column Descriptions
|
|
3
|
-
* Sets all variables that should be customized to the project.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
const column_descriptions = {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Column Descriptions
|
|
3
|
+
* Sets all variables that should be customized to the project.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const column_descriptions = {
|
|
7
|
+
ad_impressions: "The number of verified ad impressions served.",
|
|
8
|
+
ad_type:
|
|
9
|
+
"The type of ad displayed (e.g., auction_display, auction_instream, reserved_instream).",
|
|
10
|
+
age_group:
|
|
11
|
+
"Age group of the logged-in viewers (e.g., 13-17, 18-24, 25-34, etc.).",
|
|
12
|
+
annotation_click_through_rate:
|
|
13
|
+
"The ratio of clicked annotations to total clickable annotation impressions (CTR).",
|
|
14
|
+
annotation_clickable_impressions:
|
|
15
|
+
"The number of annotation impressions that could be clicked.",
|
|
16
|
+
annotation_clicks: "The total number of clicks on annotations.",
|
|
17
|
+
annotation_closable_impressions:
|
|
18
|
+
"The total number of closable annotation impressions displayed.",
|
|
19
|
+
annotation_close_rate:
|
|
20
|
+
"The ratio of closed annotations to total annotation impressions.",
|
|
21
|
+
annotation_closes: "The number of times annotations were closed by users.",
|
|
22
|
+
annotation_id: "Unique identifier for the annotation.",
|
|
23
|
+
annotation_impressions:
|
|
24
|
+
"The total number of annotation impressions displayed.",
|
|
25
|
+
annotation_type: "Integer index indicating the type of annotation.",
|
|
26
|
+
annotation_type_name: "String name of the annotation type.",
|
|
27
|
+
average_view_duration:
|
|
28
|
+
"The average duration (in seconds) of video playbacks.",
|
|
29
|
+
average_view_duration_percentage:
|
|
30
|
+
"The average percentage of a video watched during a playback.",
|
|
31
|
+
average_view_duration_seconds:
|
|
32
|
+
"The average duration in seconds that videos were watched.",
|
|
33
|
+
card_click_rate: "The ratio of card clicks to card impressions (CTR).",
|
|
34
|
+
card_clicks: "The number of times cards were clicked.",
|
|
35
|
+
card_id: "Unique identifier for the card.",
|
|
36
|
+
card_impressions: "The number of times cards were displayed (impressions).",
|
|
37
|
+
card_teaser_click_rate:
|
|
38
|
+
"The ratio of card teaser clicks to card teaser impressions (CTR).",
|
|
39
|
+
card_teaser_clicks: "The number of times card teasers were clicked.",
|
|
40
|
+
card_teaser_impressions:
|
|
41
|
+
"The number of times card teasers were displayed (impressions).",
|
|
42
|
+
card_type: "Integer index indicating the type of card.",
|
|
43
|
+
card_type_name: "String name of the card type.",
|
|
44
|
+
channel_id: "The ID of the YouTube channel associated with the metrics.",
|
|
45
|
+
claimed_status:
|
|
46
|
+
"Indicates whether the video content was claimed ('claimed' or 'unclaimed').",
|
|
47
|
+
comments: "The number of times users commented on videos.",
|
|
48
|
+
country_code:
|
|
49
|
+
"The ISO 3166-1 alpha-2 country code where the interactions occurred.",
|
|
50
|
+
cpm: "Estimated gross revenue per 1000 ad impressions (CPM, USD).",
|
|
51
|
+
device_name: "String name of the device type.",
|
|
52
|
+
device_type:
|
|
53
|
+
"The type of device used by the viewer (e.g., DESKTOP, MOBILE, TABLET, TV, GAME_CONSOLE).",
|
|
54
|
+
dislikes: "The number of times users disliked videos (negative ratings).",
|
|
55
|
+
end_screen_element_id: "Unique identifier for the end screen element.",
|
|
56
|
+
end_screen_element_type:
|
|
57
|
+
"Integer index indicating the type of end screen element.",
|
|
58
|
+
end_screen_element_type_name: "String name of the end screen element type.",
|
|
59
|
+
estimated_ad_revenue:
|
|
60
|
+
"Estimated net revenue (USD) specifically from Google-sold advertising sources (subject to month-end adjustment).",
|
|
61
|
+
estimated_minutes_watched:
|
|
62
|
+
"The estimated total number of minutes users watched videos.",
|
|
63
|
+
estimated_monetized_playbacks:
|
|
64
|
+
"The number of playbacks where at least one ad impression was shown (includes pre-rolls even if video wasn't watched).",
|
|
65
|
+
estimated_red_minutes_watched:
|
|
66
|
+
"The estimated number of minutes YouTube Premium members watched videos.",
|
|
67
|
+
estimated_revenue:
|
|
68
|
+
"Total estimated net revenue (USD) from Google-sold ads and non-ad sources (subject to month-end adjustment).",
|
|
69
|
+
gender: "Gender of the logged-in viewers.",
|
|
70
|
+
gross_revenue:
|
|
71
|
+
"Estimated gross revenue (USD) from Google-sold or DoubleClick-partner-sold ads (before revenue shares/ownership splits, subject to month-end adjustment).",
|
|
72
|
+
interaction_date:
|
|
73
|
+
"Date of interaction in YYYY-MM-DD format, based on Pacific Time (PST/PDT).",
|
|
74
|
+
likes: "The number of times users liked videos (positive ratings).",
|
|
75
|
+
live_or_on_demand:
|
|
76
|
+
"Indicates if the interaction was during a live broadcast ('LIVE') or with on-demand content ('ON_DEMAND').",
|
|
77
|
+
monetized_playbacks: "DEPRECATED. Use estimated_monetized_playbacks instead.",
|
|
78
|
+
operating_system:
|
|
79
|
+
"The operating system used by the viewer (e.g., ANDROID, IOS, LINUX, MACINTOSH, WINDOWS).",
|
|
80
|
+
operating_system_name: "String name of the operating system.",
|
|
81
|
+
playback_based_cpm:
|
|
82
|
+
"Estimated gross revenue per 1000 monetized playbacks (Playback-based CPM, USD).",
|
|
83
|
+
playback_location: "Type of location where playback occured (text).",
|
|
84
|
+
playback_location_type:
|
|
85
|
+
"Integer index indicating type of location where playback occured.",
|
|
86
|
+
playback_location_name: "String name of the playback location.",
|
|
87
|
+
playlist_id: "Unique identifier for the YouTube playlist.",
|
|
88
|
+
playlist_saves_net:
|
|
89
|
+
"The number of times users saved videos to their playlists less the number of times they removed them.",
|
|
90
|
+
playlist_starts: "Times the playlist was started.",
|
|
91
|
+
playlist_title: "Title of the YouTube playlist.",
|
|
92
|
+
province_code:
|
|
93
|
+
"The ISO code for the province or state where the interactions occurred.",
|
|
94
|
+
red_views: "The number of times YouTube Premium members viewed videos.",
|
|
95
|
+
row_max_duration_seconds:
|
|
96
|
+
"Maximum duration in seconds for the row's if all videos were played to the end. Provided simplify downstream calculation of percentage of videos watched.",
|
|
97
|
+
shares: "The number of times users shared videos via the 'Share' button.",
|
|
98
|
+
sharing_service:
|
|
99
|
+
"The service used to share the video (e.g., FACEBOOK, WHATSAPP, REDDIT, EMAIL).",
|
|
100
|
+
sharing_service_name: "String name of the sharing service.",
|
|
101
|
+
subscribed_status:
|
|
102
|
+
"Indicates whether the viewer was subscribed to the channel ('SUBSCRIBED' or 'UNSUBSCRIBED').",
|
|
103
|
+
subscribers_gained: "The number of new subscribers gained.",
|
|
104
|
+
subscribers_lost: "The number of subscribers lost (unsubscribes).",
|
|
105
|
+
subscribers_net:
|
|
106
|
+
"Net change in subscribers (subscribers gained minus subscribers lost).",
|
|
107
|
+
subtitle_language: "The language code of the video subtitle.",
|
|
108
|
+
subtitle_language_autotranslated:
|
|
109
|
+
"Boolean indicating if the subtitle was automatically translated.",
|
|
110
|
+
traffic_source_detail:
|
|
111
|
+
"Specific details about the traffic source (e.g., search term, related video ID, external URL).",
|
|
112
|
+
traffic_source_name: "String name of the traffic source type.",
|
|
113
|
+
traffic_source_type:
|
|
114
|
+
"The type of referrer through which viewers reached the video (e.g., YT_SEARCH, RELATED_VIDEO, EXT_URL, SUBSCRIBER).",
|
|
115
|
+
uploader_type:
|
|
116
|
+
"Indicates if metrics relate to content uploaded by the owner ('SELF') or claimed third-party content ('THIRD_PARTY').",
|
|
117
|
+
video_id: "The ID of a YouTube video.",
|
|
118
|
+
video_title: "Title of the YouTube video.",
|
|
119
|
+
videos_added_to_playlists:
|
|
120
|
+
"The number of times videos were added to any YouTube playlist (owner's or others').",
|
|
121
|
+
videos_removed_from_playlists:
|
|
122
|
+
"The number of times videos were removed from any YouTube playlist (owner's or others').",
|
|
123
|
+
views:
|
|
124
|
+
"The total number of times videos were viewed. In playlist reports, counts views within the playlist context.",
|
|
125
|
+
views_percentage:
|
|
126
|
+
"Percentage of total views for the time period that were logged-in.",
|
|
127
|
+
views_with_demographics:
|
|
128
|
+
"Number of views from users with demographic information available.",
|
|
129
|
+
watch_time_minutes: "The number of minutes that users watched videos.",
|
|
130
|
+
youtube_product:
|
|
131
|
+
"The YouTube property where the interaction occurred (e.g., CORE, GAMING, KIDS, MUSIC).",
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
// Export project vars
|
|
135
|
+
module.exports = {
|
|
136
|
+
column_descriptions,
|
|
137
|
+
};
|
|
@@ -2,23 +2,25 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_annotation", {
|
|
8
10
|
type: "incremental",
|
|
9
11
|
database: config.target.database,
|
|
10
|
-
schema: config.datasetStaging,
|
|
12
|
+
schema: config.target.datasetStaging,
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "annotation_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,42 +31,21 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
35
|
},
|
|
34
36
|
description: "Staging table for YouTube Channel Annotation data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_annotation")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
64
45
|
return `
|
|
65
46
|
select
|
|
66
47
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
49
|
"${t.schema}" as site_nm,
|
|
69
50
|
current_timestamp() as updated_at,
|
|
70
51
|
channel_id,
|
|
@@ -82,7 +63,11 @@ select
|
|
|
82
63
|
annotation_clicks,
|
|
83
64
|
annotation_closes
|
|
84
65
|
from ${ctx.ref(t.database, t.schema, "p_channel_annotations_a1_" + t.suffix)}
|
|
85
|
-
|
|
66
|
+
${ctx.when(
|
|
67
|
+
ctx.incremental(),
|
|
68
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
69
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
70
|
+
)}
|
|
86
71
|
|
|
87
72
|
`;
|
|
88
73
|
})
|
|
@@ -3,24 +3,25 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("stg_ytc_basic", {
|
|
10
11
|
type: "incremental",
|
|
11
12
|
database: config.target.database,
|
|
12
|
-
schema: config.datasetStaging,
|
|
13
|
+
schema: config.target.datasetStaging,
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"video_id",
|
|
26
27
|
"live_or_on_demand",
|
|
@@ -29,43 +30,22 @@ module.exports = (config) => {
|
|
|
29
30
|
],
|
|
30
31
|
],
|
|
31
32
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
33
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
34
|
},
|
|
34
35
|
columns: column_descriptions.column_descriptions,
|
|
35
36
|
description: "YT Channel Basic Report Table - Staging",
|
|
36
37
|
})
|
|
37
|
-
.preOps(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
);
|
|
42
|
-
|
|
43
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
44
|
-
set source_date_checkpoint = (
|
|
45
|
-
${ctx.when(
|
|
46
|
-
ctx.incremental(),
|
|
47
|
-
`select
|
|
48
|
-
least(
|
|
49
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
50
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
51
|
-
)`,
|
|
52
|
-
`select date("${config.startDate}")`,
|
|
53
|
-
)}
|
|
54
|
-
);
|
|
55
|
-
|
|
56
|
-
${ctx.when(
|
|
57
|
-
ctx.incremental(),
|
|
58
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
59
|
-
)}
|
|
60
|
-
`,
|
|
61
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_basic")}`;
|
|
41
|
+
})
|
|
62
42
|
.query((ctx) =>
|
|
63
43
|
config.sources
|
|
64
44
|
.map((t) => {
|
|
65
45
|
return `
|
|
66
46
|
select
|
|
67
47
|
_PARTITIONDATE as source_partition_date,
|
|
68
|
-
parse_date('%Y%m%d',date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
69
49
|
"${t.schema}" as site_nm,
|
|
70
50
|
current_timestamp() as updated_at,
|
|
71
51
|
channel_id,
|
|
@@ -101,7 +81,11 @@ select
|
|
|
101
81
|
red_views,
|
|
102
82
|
red_watch_time_minutes,
|
|
103
83
|
from ${ctx.ref(t.database, t.schema, "p_channel_basic_a2_" + t.suffix)}
|
|
104
|
-
|
|
84
|
+
${ctx.when(
|
|
85
|
+
ctx.incremental(),
|
|
86
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
87
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
88
|
+
)}
|
|
105
89
|
`;
|
|
106
90
|
})
|
|
107
91
|
.join(" union all "),
|
|
@@ -2,23 +2,25 @@
|
|
|
2
2
|
* Copyright (C) 2025 by KEN-E, LLC
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
const helpers = require("../../helpers");
|
|
6
|
+
|
|
5
7
|
module.exports = (config) => {
|
|
6
8
|
// eslint-disable-next-line no-undef
|
|
7
9
|
return publish("stg_ytc_cards", {
|
|
8
10
|
type: "incremental",
|
|
9
11
|
database: config.target.database,
|
|
10
|
-
schema: config.datasetStaging,
|
|
12
|
+
schema: config.target.datasetStaging,
|
|
11
13
|
protected: config.protected,
|
|
12
14
|
tags: ["youtube", "source", "staging", "daily"],
|
|
13
15
|
bigquery: {
|
|
14
|
-
partitionBy: "
|
|
16
|
+
partitionBy: "data_date",
|
|
15
17
|
clusterBy: ["video_id", "card_type"],
|
|
16
18
|
},
|
|
17
19
|
assertions: {
|
|
18
20
|
// make sure rows have unique dimensions
|
|
19
21
|
uniqueKeys: [
|
|
20
22
|
[
|
|
21
|
-
"
|
|
23
|
+
"data_date",
|
|
22
24
|
"channel_id",
|
|
23
25
|
"video_id",
|
|
24
26
|
"live_or_on_demand",
|
|
@@ -29,42 +31,21 @@ module.exports = (config) => {
|
|
|
29
31
|
],
|
|
30
32
|
],
|
|
31
33
|
// make sure source partition and data dates match
|
|
32
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
34
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
33
35
|
},
|
|
34
36
|
description: "Staging table for YouTube Channel Cards data",
|
|
35
37
|
})
|
|
36
|
-
.preOps(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
);
|
|
41
|
-
|
|
42
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
43
|
-
set source_date_checkpoint = (
|
|
44
|
-
${ctx.when(
|
|
45
|
-
ctx.incremental(),
|
|
46
|
-
`select
|
|
47
|
-
least(
|
|
48
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
49
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
50
|
-
)`,
|
|
51
|
-
`select date("${config.startDate}")`,
|
|
52
|
-
)}
|
|
53
|
-
);
|
|
54
|
-
|
|
55
|
-
${ctx.when(
|
|
56
|
-
ctx.incremental(),
|
|
57
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
58
|
-
)}
|
|
59
|
-
`,
|
|
60
|
-
)
|
|
38
|
+
.preOps((ctx) => {
|
|
39
|
+
// Get pre_operations to find updated source partitions
|
|
40
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_cards")}`;
|
|
41
|
+
})
|
|
61
42
|
.query((ctx) =>
|
|
62
43
|
config.sources
|
|
63
44
|
.map((t) => {
|
|
64
45
|
return `
|
|
65
46
|
select
|
|
66
47
|
_PARTITIONDATE as source_partition_date,
|
|
67
|
-
parse_date('%Y%m%d', date) as
|
|
48
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
68
49
|
"${t.schema}" as site_nm,
|
|
69
50
|
current_timestamp() as updated_at,
|
|
70
51
|
channel_id,
|
|
@@ -81,7 +62,11 @@ select
|
|
|
81
62
|
card_clicks,
|
|
82
63
|
card_teaser_clicks
|
|
83
64
|
from ${ctx.ref(t.database, t.schema, "p_channel_cards_a1_" + t.suffix)}
|
|
84
|
-
|
|
65
|
+
${ctx.when(
|
|
66
|
+
ctx.incremental(),
|
|
67
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
68
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
69
|
+
)}
|
|
85
70
|
`;
|
|
86
71
|
})
|
|
87
72
|
.join(" union all "),
|
|
@@ -3,24 +3,25 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
const column_descriptions = require("../../column_descriptions");
|
|
6
|
+
const helpers = require("../../helpers");
|
|
6
7
|
|
|
7
8
|
module.exports = (config) => {
|
|
8
9
|
// eslint-disable-next-line no-undef
|
|
9
10
|
return publish("stg_ytc_combined", {
|
|
10
11
|
type: "incremental",
|
|
11
12
|
database: config.target.database,
|
|
12
|
-
schema: config.datasetStaging,
|
|
13
|
+
schema: config.target.datasetStaging,
|
|
13
14
|
protected: config.protected,
|
|
14
15
|
tags: ["youtube", "source", "staging", "daily"],
|
|
15
16
|
bigquery: {
|
|
16
|
-
partitionBy: "
|
|
17
|
+
partitionBy: "data_date",
|
|
17
18
|
clusterBy: ["video_id"],
|
|
18
19
|
},
|
|
19
20
|
assertions: {
|
|
20
21
|
// make sure rows have unique dimensions
|
|
21
22
|
uniqueKeys: [
|
|
22
23
|
[
|
|
23
|
-
"
|
|
24
|
+
"data_date",
|
|
24
25
|
"channel_id",
|
|
25
26
|
"video_id",
|
|
26
27
|
"live_or_on_demand",
|
|
@@ -33,36 +34,15 @@ module.exports = (config) => {
|
|
|
33
34
|
],
|
|
34
35
|
],
|
|
35
36
|
// make sure source partition and data dates match
|
|
36
|
-
rowConditions: ["interaction_date = source_partition_date"],
|
|
37
|
+
// rowConditions: ["interaction_date = source_partition_date"],
|
|
37
38
|
},
|
|
38
39
|
columns: column_descriptions.column_descriptions,
|
|
39
40
|
description: "YT Channel Combined Report Table - Staging",
|
|
40
41
|
})
|
|
41
|
-
.preOps(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
);
|
|
46
|
-
|
|
47
|
-
--Set the incremental update checkpoint based on current max partition value minus lookback.
|
|
48
|
-
set source_date_checkpoint = (
|
|
49
|
-
${ctx.when(
|
|
50
|
-
ctx.incremental(),
|
|
51
|
-
`select
|
|
52
|
-
least(
|
|
53
|
-
(select date_sub(current_date(), interval ${config.daysBack} day)),
|
|
54
|
-
(select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
|
|
55
|
-
)`,
|
|
56
|
-
`select date("${config.startDate}")`,
|
|
57
|
-
)}
|
|
58
|
-
);
|
|
59
|
-
|
|
60
|
-
${ctx.when(
|
|
61
|
-
ctx.incremental(),
|
|
62
|
-
`delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
|
|
63
|
-
)}
|
|
64
|
-
`,
|
|
65
|
-
)
|
|
42
|
+
.preOps((ctx) => {
|
|
43
|
+
// Get pre_operations to find updated source partitions
|
|
44
|
+
return `${helpers.staging_preops(ctx, config, "stg_ytc_combined")}`;
|
|
45
|
+
})
|
|
66
46
|
.query((ctx) =>
|
|
67
47
|
config.sources
|
|
68
48
|
.map((t) => {
|
|
@@ -70,7 +50,7 @@ declare source_date_checkpoint default (
|
|
|
70
50
|
|
|
71
51
|
select
|
|
72
52
|
_PARTITIONDATE as source_partition_date,
|
|
73
|
-
parse_date('%Y%m%d',date) as
|
|
53
|
+
parse_date('%Y%m%d', date) as data_date,
|
|
74
54
|
"${t.schema}" as site_nm,
|
|
75
55
|
current_timestamp() as updated_at,
|
|
76
56
|
channel_id,
|
|
@@ -89,7 +69,11 @@ select
|
|
|
89
69
|
red_views,
|
|
90
70
|
red_watch_time_minutes,
|
|
91
71
|
from ${ctx.ref(t.database, t.schema, "p_channel_combined_a2_" + t.suffix)}
|
|
92
|
-
|
|
72
|
+
${ctx.when(
|
|
73
|
+
ctx.incremental(),
|
|
74
|
+
`where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
|
|
75
|
+
`where date(_PARTITIONTIME) > date(self_checkpoint)`,
|
|
76
|
+
)}
|
|
93
77
|
`;
|
|
94
78
|
})
|
|
95
79
|
.join(" union all "),
|