@ken-e/dataform-youtube 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/includes/column_descriptions.js +137 -95
  2. package/includes/definitions/sources/stg_ytc_annotation.js +16 -31
  3. package/includes/definitions/sources/stg_ytc_basic.js +15 -31
  4. package/includes/definitions/sources/stg_ytc_cards.js +16 -31
  5. package/includes/definitions/sources/stg_ytc_combined.js +15 -31
  6. package/includes/definitions/sources/stg_ytc_demographics.js +16 -31
  7. package/includes/definitions/sources/stg_ytc_device_os.js +16 -31
  8. package/includes/definitions/sources/stg_ytc_end_screens.js +14 -29
  9. package/includes/definitions/sources/stg_ytc_list_basic.js +15 -31
  10. package/includes/definitions/sources/stg_ytc_list_combined.js +16 -30
  11. package/includes/definitions/sources/stg_ytc_list_device_os.js +16 -30
  12. package/includes/definitions/sources/stg_ytc_list_playback.js +16 -30
  13. package/includes/definitions/sources/stg_ytc_list_province.js +15 -31
  14. package/includes/definitions/sources/stg_ytc_list_traffic_source.js +16 -30
  15. package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
  16. package/includes/definitions/sources/stg_ytc_playback.js +16 -31
  17. package/includes/definitions/sources/stg_ytc_province.js +15 -31
  18. package/includes/definitions/sources/stg_ytc_share_platform.js +16 -31
  19. package/includes/definitions/sources/stg_ytc_subtitles.js +15 -30
  20. package/includes/definitions/sources/stg_ytc_traffic_source.js +16 -49
  21. package/includes/definitions/ytc_annotation.js +14 -29
  22. package/includes/definitions/ytc_basic.js +14 -30
  23. package/includes/definitions/ytc_cards.js +14 -29
  24. package/includes/definitions/ytc_combined.js +14 -31
  25. package/includes/definitions/ytc_demographics.js +14 -28
  26. package/includes/definitions/ytc_demographics_views.js +15 -29
  27. package/includes/definitions/ytc_device_os.js +14 -28
  28. package/includes/definitions/ytc_end_screens.js +14 -28
  29. package/includes/definitions/ytc_list_basic.js +14 -28
  30. package/includes/definitions/ytc_list_combined.js +14 -27
  31. package/includes/definitions/ytc_list_device_os.js +14 -27
  32. package/includes/definitions/ytc_list_playback.js +14 -27
  33. package/includes/definitions/ytc_list_province.js +14 -28
  34. package/includes/definitions/ytc_list_traffic_source.js +14 -27
  35. package/includes/definitions/ytc_playback.js +14 -28
  36. package/includes/definitions/ytc_province.js +14 -28
  37. package/includes/definitions/ytc_share_platform.js +14 -28
  38. package/includes/definitions/ytc_subtitles.js +14 -27
  39. package/includes/definitions/ytc_traffic_source.js +14 -28
  40. package/includes/helpers.js +12 -0
  41. package/index.js +12 -2
  42. package/package.json +3 -3
@@ -1,95 +1,137 @@
1
- /**
2
- * Column Descriptions
3
- * Sets all variables that should be customized to the project.
4
- */
5
-
6
- const column_descriptions = {
7
- ad_impressions: "The number of verified ad impressions served.",
8
- ad_type: "The type of ad displayed (e.g., auction_display, auction_instream, reserved_instream).",
9
- age_group: "Age group of the logged-in viewers (e.g., 13-17, 18-24, 25-34, etc.).",
10
- annotation_click_through_rate: "The ratio of clicked annotations to total clickable annotation impressions (CTR).",
11
- annotation_clickable_impressions: "The number of annotation impressions that could be clicked.",
12
- annotation_clicks: "The total number of clicks on annotations.",
13
- annotation_closable_impressions: "The total number of closable annotation impressions displayed.",
14
- annotation_close_rate: "The ratio of closed annotations to total annotation impressions.",
15
- annotation_closes: "The number of times annotations were closed by users.",
16
- annotation_id: "Unique identifier for the annotation.",
17
- annotation_impressions: "The total number of annotation impressions displayed.",
18
- annotation_type: "Integer index indicating the type of annotation.",
19
- annotation_type_name: "String name of the annotation type.",
20
- average_view_duration: "The average duration (in seconds) of video playbacks.",
21
- average_view_duration_percentage: "The average percentage of a video watched during a playback.",
22
- average_view_duration_seconds: "The average duration in seconds that videos were watched.",
23
- card_click_rate: "The ratio of card clicks to card impressions (CTR).",
24
- card_clicks: "The number of times cards were clicked.",
25
- card_id: "Unique identifier for the card.",
26
- card_impressions: "The number of times cards were displayed (impressions).",
27
- card_teaser_click_rate: "The ratio of card teaser clicks to card teaser impressions (CTR).",
28
- card_teaser_clicks: "The number of times card teasers were clicked.",
29
- card_teaser_impressions: "The number of times card teasers were displayed (impressions).",
30
- card_type: "Integer index indicating the type of card.",
31
- card_type_name: "String name of the card type.",
32
- channel_id: "The ID of the YouTube channel associated with the metrics.",
33
- claimed_status: "Indicates whether the video content was claimed ('claimed' or 'unclaimed').",
34
- comments: "The number of times users commented on videos.",
35
- country_code: "The ISO 3166-1 alpha-2 country code where the interactions occurred.",
36
- cpm: "Estimated gross revenue per 1000 ad impressions (CPM, USD).",
37
- device_name: "String name of the device type.",
38
- device_type: "The type of device used by the viewer (e.g., DESKTOP, MOBILE, TABLET, TV, GAME_CONSOLE).",
39
- dislikes: "The number of times users disliked videos (negative ratings).",
40
- end_screen_element_id: "Unique identifier for the end screen element.",
41
- end_screen_element_type: "Integer index indicating the type of end screen element.",
42
- end_screen_element_type_name: "String name of the end screen element type.",
43
- estimated_ad_revenue: "Estimated net revenue (USD) specifically from Google-sold advertising sources (subject to month-end adjustment).",
44
- estimated_minutes_watched: "The estimated total number of minutes users watched videos.",
45
- estimated_monetized_playbacks: "The number of playbacks where at least one ad impression was shown (includes pre-rolls even if video wasn't watched).",
46
- estimated_red_minutes_watched: "The estimated number of minutes YouTube Premium members watched videos.",
47
- estimated_revenue: "Total estimated net revenue (USD) from Google-sold ads and non-ad sources (subject to month-end adjustment).",
48
- gender: "Gender of the logged-in viewers.",
49
- gross_revenue: "Estimated gross revenue (USD) from Google-sold or DoubleClick-partner-sold ads (before revenue shares/ownership splits, subject to month-end adjustment).",
50
- interaction_date: "Date of interaction in YYYY-MM-DD format, based on Pacific Time (PST/PDT).",
51
- likes: "The number of times users liked videos (positive ratings).",
52
- live_or_on_demand: "Indicates if the interaction was during a live broadcast ('LIVE') or with on-demand content ('ON_DEMAND').",
53
- monetized_playbacks: "DEPRECATED. Use estimated_monetized_playbacks instead.",
54
- operating_system: "The operating system used by the viewer (e.g., ANDROID, IOS, LINUX, MACINTOSH, WINDOWS).",
55
- operating_system_name: "String name of the operating system.",
56
- playback_based_cpm: "Estimated gross revenue per 1000 monetized playbacks (Playback-based CPM, USD).",
57
- playback_location: "Type of location where playback occured (text).",
58
- playback_location_type: "Integer index indicating type of location where playback occured.",
59
- playback_location_name: "String name of the playback location.",
60
- playlist_id: "Unique identifier for the YouTube playlist.",
61
- playlist_saves_net: "The number of times users saved videos to their playlists less the number of times they removed them.",
62
- playlist_starts: "Times the playlist was started.",
63
- playlist_title: "Title of the YouTube playlist.",
64
- province_code: "The ISO code for the province or state where the interactions occurred.",
65
- red_views: "The number of times YouTube Premium members viewed videos.",
66
- row_max_duration_seconds: "Maximum duration in seconds for the row's if all videos were played to the end. Provided simplify downstream calculation of percentage of videos watched.",
67
- shares: "The number of times users shared videos via the 'Share' button.",
68
- sharing_service: "The service used to share the video (e.g., FACEBOOK, WHATSAPP, REDDIT, EMAIL).",
69
- sharing_service_name: "String name of the sharing service.",
70
- subscribed_status: "Indicates whether the viewer was subscribed to the channel ('SUBSCRIBED' or 'UNSUBSCRIBED').",
71
- subscribers_gained: "The number of new subscribers gained.",
72
- subscribers_lost: "The number of subscribers lost (unsubscribes).",
73
- subscribers_net: "Net change in subscribers (subscribers gained minus subscribers lost).",
74
- subtitle_language: "The language code of the video subtitle.",
75
- subtitle_language_autotranslated: "Boolean indicating if the subtitle was automatically translated.",
76
- traffic_source_detail: "Specific details about the traffic source (e.g., search term, related video ID, external URL).",
77
- traffic_source_name: "String name of the traffic source type.",
78
- traffic_source_type: "The type of referrer through which viewers reached the video (e.g., YT_SEARCH, RELATED_VIDEO, EXT_URL, SUBSCRIBER).",
79
- uploader_type: "Indicates if metrics relate to content uploaded by the owner ('SELF') or claimed third-party content ('THIRD_PARTY').",
80
- video_id: "The ID of a YouTube video.",
81
- video_title: "Title of the YouTube video.",
82
- videos_added_to_playlists: "The number of times videos were added to any YouTube playlist (owner's or others').",
83
- videos_removed_from_playlists: "The number of times videos were removed from any YouTube playlist (owner's or others').",
84
- views: "The total number of times videos were viewed. In playlist reports, counts views within the playlist context.",
85
- views_percentage: "Percentage of total views for the time period that were logged-in.",
86
- views_with_demographics: "Number of views from users with demographic information available.",
87
- watch_time_minutes: "The number of minutes that users watched videos.",
88
- youtube_product: "The YouTube property where the interaction occurred (e.g., CORE, GAMING, KIDS, MUSIC)."
89
- };
90
-
91
-
92
- // Export project vars
93
- module.exports = {
94
- column_descriptions
95
- };
1
+ /**
2
+ * Column Descriptions
3
+ * Sets all variables that should be customized to the project.
4
+ */
5
+
6
+ const column_descriptions = {
7
+ ad_impressions: "The number of verified ad impressions served.",
8
+ ad_type:
9
+ "The type of ad displayed (e.g., auction_display, auction_instream, reserved_instream).",
10
+ age_group:
11
+ "Age group of the logged-in viewers (e.g., 13-17, 18-24, 25-34, etc.).",
12
+ annotation_click_through_rate:
13
+ "The ratio of clicked annotations to total clickable annotation impressions (CTR).",
14
+ annotation_clickable_impressions:
15
+ "The number of annotation impressions that could be clicked.",
16
+ annotation_clicks: "The total number of clicks on annotations.",
17
+ annotation_closable_impressions:
18
+ "The total number of closable annotation impressions displayed.",
19
+ annotation_close_rate:
20
+ "The ratio of closed annotations to total annotation impressions.",
21
+ annotation_closes: "The number of times annotations were closed by users.",
22
+ annotation_id: "Unique identifier for the annotation.",
23
+ annotation_impressions:
24
+ "The total number of annotation impressions displayed.",
25
+ annotation_type: "Integer index indicating the type of annotation.",
26
+ annotation_type_name: "String name of the annotation type.",
27
+ average_view_duration:
28
+ "The average duration (in seconds) of video playbacks.",
29
+ average_view_duration_percentage:
30
+ "The average percentage of a video watched during a playback.",
31
+ average_view_duration_seconds:
32
+ "The average duration in seconds that videos were watched.",
33
+ card_click_rate: "The ratio of card clicks to card impressions (CTR).",
34
+ card_clicks: "The number of times cards were clicked.",
35
+ card_id: "Unique identifier for the card.",
36
+ card_impressions: "The number of times cards were displayed (impressions).",
37
+ card_teaser_click_rate:
38
+ "The ratio of card teaser clicks to card teaser impressions (CTR).",
39
+ card_teaser_clicks: "The number of times card teasers were clicked.",
40
+ card_teaser_impressions:
41
+ "The number of times card teasers were displayed (impressions).",
42
+ card_type: "Integer index indicating the type of card.",
43
+ card_type_name: "String name of the card type.",
44
+ channel_id: "The ID of the YouTube channel associated with the metrics.",
45
+ claimed_status:
46
+ "Indicates whether the video content was claimed ('claimed' or 'unclaimed').",
47
+ comments: "The number of times users commented on videos.",
48
+ country_code:
49
+ "The ISO 3166-1 alpha-2 country code where the interactions occurred.",
50
+ cpm: "Estimated gross revenue per 1000 ad impressions (CPM, USD).",
51
+ device_name: "String name of the device type.",
52
+ device_type:
53
+ "The type of device used by the viewer (e.g., DESKTOP, MOBILE, TABLET, TV, GAME_CONSOLE).",
54
+ dislikes: "The number of times users disliked videos (negative ratings).",
55
+ end_screen_element_id: "Unique identifier for the end screen element.",
56
+ end_screen_element_type:
57
+ "Integer index indicating the type of end screen element.",
58
+ end_screen_element_type_name: "String name of the end screen element type.",
59
+ estimated_ad_revenue:
60
+ "Estimated net revenue (USD) specifically from Google-sold advertising sources (subject to month-end adjustment).",
61
+ estimated_minutes_watched:
62
+ "The estimated total number of minutes users watched videos.",
63
+ estimated_monetized_playbacks:
64
+ "The number of playbacks where at least one ad impression was shown (includes pre-rolls even if video wasn't watched).",
65
+ estimated_red_minutes_watched:
66
+ "The estimated number of minutes YouTube Premium members watched videos.",
67
+ estimated_revenue:
68
+ "Total estimated net revenue (USD) from Google-sold ads and non-ad sources (subject to month-end adjustment).",
69
+ gender: "Gender of the logged-in viewers.",
70
+ gross_revenue:
71
+ "Estimated gross revenue (USD) from Google-sold or DoubleClick-partner-sold ads (before revenue shares/ownership splits, subject to month-end adjustment).",
72
+ interaction_date:
73
+ "Date of interaction in YYYY-MM-DD format, based on Pacific Time (PST/PDT).",
74
+ likes: "The number of times users liked videos (positive ratings).",
75
+ live_or_on_demand:
76
+ "Indicates if the interaction was during a live broadcast ('LIVE') or with on-demand content ('ON_DEMAND').",
77
+ monetized_playbacks: "DEPRECATED. Use estimated_monetized_playbacks instead.",
78
+ operating_system:
79
+ "The operating system used by the viewer (e.g., ANDROID, IOS, LINUX, MACINTOSH, WINDOWS).",
80
+ operating_system_name: "String name of the operating system.",
81
+ playback_based_cpm:
82
+ "Estimated gross revenue per 1000 monetized playbacks (Playback-based CPM, USD).",
83
+ playback_location: "Type of location where playback occured (text).",
84
+ playback_location_type:
85
+ "Integer index indicating type of location where playback occured.",
86
+ playback_location_name: "String name of the playback location.",
87
+ playlist_id: "Unique identifier for the YouTube playlist.",
88
+ playlist_saves_net:
89
+ "The number of times users saved videos to their playlists less the number of times they removed them.",
90
+ playlist_starts: "Times the playlist was started.",
91
+ playlist_title: "Title of the YouTube playlist.",
92
+ province_code:
93
+ "The ISO code for the province or state where the interactions occurred.",
94
+ red_views: "The number of times YouTube Premium members viewed videos.",
95
+ row_max_duration_seconds:
96
+ "Maximum duration in seconds for the row's if all videos were played to the end. Provided simplify downstream calculation of percentage of videos watched.",
97
+ shares: "The number of times users shared videos via the 'Share' button.",
98
+ sharing_service:
99
+ "The service used to share the video (e.g., FACEBOOK, WHATSAPP, REDDIT, EMAIL).",
100
+ sharing_service_name: "String name of the sharing service.",
101
+ subscribed_status:
102
+ "Indicates whether the viewer was subscribed to the channel ('SUBSCRIBED' or 'UNSUBSCRIBED').",
103
+ subscribers_gained: "The number of new subscribers gained.",
104
+ subscribers_lost: "The number of subscribers lost (unsubscribes).",
105
+ subscribers_net:
106
+ "Net change in subscribers (subscribers gained minus subscribers lost).",
107
+ subtitle_language: "The language code of the video subtitle.",
108
+ subtitle_language_autotranslated:
109
+ "Boolean indicating if the subtitle was automatically translated.",
110
+ traffic_source_detail:
111
+ "Specific details about the traffic source (e.g., search term, related video ID, external URL).",
112
+ traffic_source_name: "String name of the traffic source type.",
113
+ traffic_source_type:
114
+ "The type of referrer through which viewers reached the video (e.g., YT_SEARCH, RELATED_VIDEO, EXT_URL, SUBSCRIBER).",
115
+ uploader_type:
116
+ "Indicates if metrics relate to content uploaded by the owner ('SELF') or claimed third-party content ('THIRD_PARTY').",
117
+ video_id: "The ID of a YouTube video.",
118
+ video_title: "Title of the YouTube video.",
119
+ videos_added_to_playlists:
120
+ "The number of times videos were added to any YouTube playlist (owner's or others').",
121
+ videos_removed_from_playlists:
122
+ "The number of times videos were removed from any YouTube playlist (owner's or others').",
123
+ views:
124
+ "The total number of times videos were viewed. In playlist reports, counts views within the playlist context.",
125
+ views_percentage:
126
+ "Percentage of total views for the time period that were logged-in.",
127
+ views_with_demographics:
128
+ "Number of views from users with demographic information available.",
129
+ watch_time_minutes: "The number of minutes that users watched videos.",
130
+ youtube_product:
131
+ "The YouTube property where the interaction occurred (e.g., CORE, GAMING, KIDS, MUSIC).",
132
+ };
133
+
134
+ // Export project vars
135
+ module.exports = {
136
+ column_descriptions,
137
+ };
@@ -2,23 +2,25 @@
2
2
  * Copyright (C) 2025 by KEN-E, LLC
3
3
  */
4
4
 
5
+ const helpers = require("../../helpers");
6
+
5
7
  module.exports = (config) => {
6
8
  // eslint-disable-next-line no-undef
7
9
  return publish("stg_ytc_annotation", {
8
10
  type: "incremental",
9
11
  database: config.target.database,
10
- schema: config.datasetStaging,
12
+ schema: config.target.datasetStaging,
11
13
  protected: config.protected,
12
14
  tags: ["youtube", "source", "staging", "daily"],
13
15
  bigquery: {
14
- partitionBy: "interaction_date",
16
+ partitionBy: "data_date",
15
17
  clusterBy: ["video_id", "annotation_type"],
16
18
  },
17
19
  assertions: {
18
20
  // make sure rows have unique dimensions
19
21
  uniqueKeys: [
20
22
  [
21
- "interaction_date",
23
+ "data_date",
22
24
  "channel_id",
23
25
  "video_id",
24
26
  "live_or_on_demand",
@@ -29,42 +31,21 @@ module.exports = (config) => {
29
31
  ],
30
32
  ],
31
33
  // make sure source partition and data dates match
32
- rowConditions: ["interaction_date = source_partition_date"],
34
+ // rowConditions: ["interaction_date = source_partition_date"],
33
35
  },
34
36
  description: "Staging table for YouTube Channel Annotation data",
35
37
  })
36
- .preOps(
37
- (ctx) => `
38
- declare source_date_checkpoint default (
39
- select date("${config.startDate}")
40
- );
41
-
42
- --Set the incremental update checkpoint based on current max partition value minus lookback.
43
- set source_date_checkpoint = (
44
- ${ctx.when(
45
- ctx.incremental(),
46
- `select
47
- least(
48
- (select date_sub(current_date(), interval ${config.daysBack} day)),
49
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
50
- )`,
51
- `select date("${config.startDate}")`,
52
- )}
53
- );
54
-
55
- ${ctx.when(
56
- ctx.incremental(),
57
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
58
- )}
59
- `,
60
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_annotation")}`;
41
+ })
61
42
  .query((ctx) =>
62
43
  config.sources
63
44
  .map((t) => {
64
45
  return `
65
46
  select
66
47
  _PARTITIONDATE as source_partition_date,
67
- parse_date('%Y%m%d', date) as interaction_date,
48
+ parse_date('%Y%m%d', date) as data_date,
68
49
  "${t.schema}" as site_nm,
69
50
  current_timestamp() as updated_at,
70
51
  channel_id,
@@ -82,7 +63,11 @@ select
82
63
  annotation_clicks,
83
64
  annotation_closes
84
65
  from ${ctx.ref(t.database, t.schema, "p_channel_annotations_a1_" + t.suffix)}
85
- where _PARTITIONDATE > source_date_checkpoint
66
+ ${ctx.when(
67
+ ctx.incremental(),
68
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
69
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
70
+ )}
86
71
 
87
72
  `;
88
73
  })
@@ -3,24 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../../column_descriptions");
6
+ const helpers = require("../../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("stg_ytc_basic", {
10
11
  type: "incremental",
11
12
  database: config.target.database,
12
- schema: config.datasetStaging,
13
+ schema: config.target.datasetStaging,
13
14
  protected: config.protected,
14
15
  tags: ["youtube", "source", "staging", "daily"],
15
16
  bigquery: {
16
- partitionBy: "interaction_date",
17
+ partitionBy: "data_date",
17
18
  clusterBy: ["video_id"],
18
19
  },
19
20
  assertions: {
20
21
  // make sure rows have unique dimensions
21
22
  uniqueKeys: [
22
23
  [
23
- "interaction_date",
24
+ "data_date",
24
25
  "channel_id",
25
26
  "video_id",
26
27
  "live_or_on_demand",
@@ -29,43 +30,22 @@ module.exports = (config) => {
29
30
  ],
30
31
  ],
31
32
  // make sure source partition and data dates match
32
- rowConditions: ["interaction_date = source_partition_date"],
33
+ // rowConditions: ["interaction_date = source_partition_date"],
33
34
  },
34
35
  columns: column_descriptions.column_descriptions,
35
36
  description: "YT Channel Basic Report Table - Staging",
36
37
  })
37
- .preOps(
38
- (ctx) => `
39
- declare source_date_checkpoint default (
40
- select date("${config.startDate}")
41
- );
42
-
43
- --Set the incremental update checkpoint based on current max partition value minus lookback.
44
- set source_date_checkpoint = (
45
- ${ctx.when(
46
- ctx.incremental(),
47
- `select
48
- least(
49
- (select date_sub(current_date(), interval ${config.daysBack} day)),
50
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
51
- )`,
52
- `select date("${config.startDate}")`,
53
- )}
54
- );
55
-
56
- ${ctx.when(
57
- ctx.incremental(),
58
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
59
- )}
60
- `,
61
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_basic")}`;
41
+ })
62
42
  .query((ctx) =>
63
43
  config.sources
64
44
  .map((t) => {
65
45
  return `
66
46
  select
67
47
  _PARTITIONDATE as source_partition_date,
68
- parse_date('%Y%m%d',date) as interaction_date,
48
+ parse_date('%Y%m%d', date) as data_date,
69
49
  "${t.schema}" as site_nm,
70
50
  current_timestamp() as updated_at,
71
51
  channel_id,
@@ -101,7 +81,11 @@ select
101
81
  red_views,
102
82
  red_watch_time_minutes,
103
83
  from ${ctx.ref(t.database, t.schema, "p_channel_basic_a2_" + t.suffix)}
104
- where _PARTITIONDATE > source_date_checkpoint
84
+ ${ctx.when(
85
+ ctx.incremental(),
86
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
87
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
88
+ )}
105
89
  `;
106
90
  })
107
91
  .join(" union all "),
@@ -2,23 +2,25 @@
2
2
  * Copyright (C) 2025 by KEN-E, LLC
3
3
  */
4
4
 
5
+ const helpers = require("../../helpers");
6
+
5
7
  module.exports = (config) => {
6
8
  // eslint-disable-next-line no-undef
7
9
  return publish("stg_ytc_cards", {
8
10
  type: "incremental",
9
11
  database: config.target.database,
10
- schema: config.datasetStaging,
12
+ schema: config.target.datasetStaging,
11
13
  protected: config.protected,
12
14
  tags: ["youtube", "source", "staging", "daily"],
13
15
  bigquery: {
14
- partitionBy: "interaction_date",
16
+ partitionBy: "data_date",
15
17
  clusterBy: ["video_id", "card_type"],
16
18
  },
17
19
  assertions: {
18
20
  // make sure rows have unique dimensions
19
21
  uniqueKeys: [
20
22
  [
21
- "interaction_date",
23
+ "data_date",
22
24
  "channel_id",
23
25
  "video_id",
24
26
  "live_or_on_demand",
@@ -29,42 +31,21 @@ module.exports = (config) => {
29
31
  ],
30
32
  ],
31
33
  // make sure source partition and data dates match
32
- rowConditions: ["interaction_date = source_partition_date"],
34
+ // rowConditions: ["interaction_date = source_partition_date"],
33
35
  },
34
36
  description: "Staging table for YouTube Channel Cards data",
35
37
  })
36
- .preOps(
37
- (ctx) => `
38
- declare source_date_checkpoint default (
39
- select date("${config.startDate}")
40
- );
41
-
42
- --Set the incremental update checkpoint based on current max partition value minus lookback.
43
- set source_date_checkpoint = (
44
- ${ctx.when(
45
- ctx.incremental(),
46
- `select
47
- least(
48
- (select date_sub(current_date(), interval ${config.daysBack} day)),
49
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
50
- )`,
51
- `select date("${config.startDate}")`,
52
- )}
53
- );
54
-
55
- ${ctx.when(
56
- ctx.incremental(),
57
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
58
- )}
59
- `,
60
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_cards")}`;
41
+ })
61
42
  .query((ctx) =>
62
43
  config.sources
63
44
  .map((t) => {
64
45
  return `
65
46
  select
66
47
  _PARTITIONDATE as source_partition_date,
67
- parse_date('%Y%m%d', date) as interaction_date,
48
+ parse_date('%Y%m%d', date) as data_date,
68
49
  "${t.schema}" as site_nm,
69
50
  current_timestamp() as updated_at,
70
51
  channel_id,
@@ -81,7 +62,11 @@ select
81
62
  card_clicks,
82
63
  card_teaser_clicks
83
64
  from ${ctx.ref(t.database, t.schema, "p_channel_cards_a1_" + t.suffix)}
84
- where _PARTITIONDATE > source_date_checkpoint
65
+ ${ctx.when(
66
+ ctx.incremental(),
67
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
68
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
69
+ )}
85
70
  `;
86
71
  })
87
72
  .join(" union all "),
@@ -3,24 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../../column_descriptions");
6
+ const helpers = require("../../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("stg_ytc_combined", {
10
11
  type: "incremental",
11
12
  database: config.target.database,
12
- schema: config.datasetStaging,
13
+ schema: config.target.datasetStaging,
13
14
  protected: config.protected,
14
15
  tags: ["youtube", "source", "staging", "daily"],
15
16
  bigquery: {
16
- partitionBy: "interaction_date",
17
+ partitionBy: "data_date",
17
18
  clusterBy: ["video_id"],
18
19
  },
19
20
  assertions: {
20
21
  // make sure rows have unique dimensions
21
22
  uniqueKeys: [
22
23
  [
23
- "interaction_date",
24
+ "data_date",
24
25
  "channel_id",
25
26
  "video_id",
26
27
  "live_or_on_demand",
@@ -33,36 +34,15 @@ module.exports = (config) => {
33
34
  ],
34
35
  ],
35
36
  // make sure source partition and data dates match
36
- rowConditions: ["interaction_date = source_partition_date"],
37
+ // rowConditions: ["interaction_date = source_partition_date"],
37
38
  },
38
39
  columns: column_descriptions.column_descriptions,
39
40
  description: "YT Channel Combined Report Table - Staging",
40
41
  })
41
- .preOps(
42
- (ctx) => `
43
- declare source_date_checkpoint default (
44
- select date("${config.startDate}")
45
- );
46
-
47
- --Set the incremental update checkpoint based on current max partition value minus lookback.
48
- set source_date_checkpoint = (
49
- ${ctx.when(
50
- ctx.incremental(),
51
- `select
52
- least(
53
- (select date_sub(current_date(), interval ${config.daysBack} day)),
54
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
55
- )`,
56
- `select date("${config.startDate}")`,
57
- )}
58
- );
59
-
60
- ${ctx.when(
61
- ctx.incremental(),
62
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
63
- )}
64
- `,
65
- )
42
+ .preOps((ctx) => {
43
+ // Get pre_operations to find updated source partitions
44
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_combined")}`;
45
+ })
66
46
  .query((ctx) =>
67
47
  config.sources
68
48
  .map((t) => {
@@ -70,7 +50,7 @@ declare source_date_checkpoint default (
70
50
 
71
51
  select
72
52
  _PARTITIONDATE as source_partition_date,
73
- parse_date('%Y%m%d',date) as interaction_date,
53
+ parse_date('%Y%m%d', date) as data_date,
74
54
  "${t.schema}" as site_nm,
75
55
  current_timestamp() as updated_at,
76
56
  channel_id,
@@ -89,7 +69,11 @@ select
89
69
  red_views,
90
70
  red_watch_time_minutes,
91
71
  from ${ctx.ref(t.database, t.schema, "p_channel_combined_a2_" + t.suffix)}
92
- where _PARTITIONDATE > source_date_checkpoint
72
+ ${ctx.when(
73
+ ctx.incremental(),
74
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
75
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
76
+ )}
93
77
  `;
94
78
  })
95
79
  .join(" union all "),