@ken-e/dataform-youtube 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/includes/column_descriptions.js +137 -95
  2. package/includes/definitions/sources/stg_ytc_annotation.js +15 -30
  3. package/includes/definitions/sources/stg_ytc_basic.js +14 -30
  4. package/includes/definitions/sources/stg_ytc_cards.js +15 -30
  5. package/includes/definitions/sources/stg_ytc_combined.js +14 -30
  6. package/includes/definitions/sources/stg_ytc_demographics.js +15 -30
  7. package/includes/definitions/sources/stg_ytc_device_os.js +15 -30
  8. package/includes/definitions/sources/stg_ytc_end_screens.js +13 -28
  9. package/includes/definitions/sources/stg_ytc_list_basic.js +14 -30
  10. package/includes/definitions/sources/stg_ytc_list_combined.js +15 -29
  11. package/includes/definitions/sources/stg_ytc_list_device_os.js +15 -29
  12. package/includes/definitions/sources/stg_ytc_list_playback.js +15 -29
  13. package/includes/definitions/sources/stg_ytc_list_province.js +14 -30
  14. package/includes/definitions/sources/stg_ytc_list_traffic_source.js +15 -29
  15. package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
  16. package/includes/definitions/sources/stg_ytc_playback.js +15 -30
  17. package/includes/definitions/sources/stg_ytc_province.js +14 -30
  18. package/includes/definitions/sources/stg_ytc_share_platform.js +15 -30
  19. package/includes/definitions/sources/stg_ytc_subtitles.js +14 -29
  20. package/includes/definitions/sources/stg_ytc_traffic_source.js +15 -48
  21. package/includes/definitions/ytc_annotation.js +14 -29
  22. package/includes/definitions/ytc_basic.js +14 -30
  23. package/includes/definitions/ytc_cards.js +14 -29
  24. package/includes/definitions/ytc_combined.js +14 -31
  25. package/includes/definitions/ytc_demographics.js +14 -28
  26. package/includes/definitions/ytc_demographics_views.js +15 -29
  27. package/includes/definitions/ytc_device_os.js +14 -28
  28. package/includes/definitions/ytc_end_screens.js +14 -28
  29. package/includes/definitions/ytc_list_basic.js +14 -28
  30. package/includes/definitions/ytc_list_combined.js +14 -27
  31. package/includes/definitions/ytc_list_device_os.js +14 -27
  32. package/includes/definitions/ytc_list_playback.js +14 -27
  33. package/includes/definitions/ytc_list_province.js +14 -28
  34. package/includes/definitions/ytc_list_traffic_source.js +14 -27
  35. package/includes/definitions/ytc_playback.js +14 -28
  36. package/includes/definitions/ytc_province.js +14 -28
  37. package/includes/definitions/ytc_share_platform.js +14 -28
  38. package/includes/definitions/ytc_subtitles.js +14 -27
  39. package/includes/definitions/ytc_traffic_source.js +14 -28
  40. package/includes/helpers.js +12 -0
  41. package/index.js +12 -2
  42. package/package.json +3 -3
@@ -2,6 +2,8 @@
2
2
  * Copyright (C) 2025 by KEN-E, LLC
3
3
  */
4
4
 
5
+ const helpers = require("../../helpers");
6
+
5
7
  module.exports = (config) => {
6
8
  // eslint-disable-next-line no-undef
7
9
  return publish("stg_ytc_list_traffic_source", {
@@ -11,13 +13,13 @@ module.exports = (config) => {
11
13
  protected: config.protected,
12
14
  tags: ["youtube", "source", "staging", "daily"],
13
15
  bigquery: {
14
- partitionBy: "interaction_date",
16
+ partitionBy: "data_date",
15
17
  clusterBy: ["playlist_id", "video_id", "traffic_source_type"],
16
18
  },
17
19
  assertions: {
18
20
  uniqueKeys: [
19
21
  [
20
- "interaction_date",
22
+ "data_date",
21
23
  "channel_id",
22
24
  "playlist_id",
23
25
  "video_id",
@@ -28,42 +30,22 @@ module.exports = (config) => {
28
30
  "traffic_source_detail",
29
31
  ],
30
32
  ],
31
- rowConditions: ["interaction_date = source_partition_date"],
33
+ // rowConditions: ["interaction_date = source_partition_date"],
32
34
  },
33
35
  description:
34
36
  "Staging table for YouTube Channel Playlist Traffic Source data",
35
37
  })
36
- .preOps(
37
- (ctx) => `
38
- declare source_date_checkpoint default (
39
- select date("${config.startDate}")
40
- );
41
-
42
- set source_date_checkpoint = (
43
- ${ctx.when(
44
- ctx.incremental(),
45
- `select
46
- least(
47
- (select date_sub(current_date(), interval ${config.daysBack} day)),
48
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
49
- )`,
50
- `select date("${config.startDate}")`,
51
- )}
52
- );
53
-
54
- ${ctx.when(
55
- ctx.incremental(),
56
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
57
- )}
58
- `,
59
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_list_traffic_source")}`;
41
+ })
60
42
  .query((ctx) =>
61
43
  config.sources
62
44
  .map((t) => {
63
45
  return `
64
46
  select
65
47
  _PARTITIONDATE as source_partition_date,
66
- parse_date('%Y%m%d', date) as interaction_date,
48
+ parse_date('%Y%m%d', date) as data_date,
67
49
  "${t.schema}" as site_nm,
68
50
  current_timestamp() as updated_at,
69
51
  channel_id,
@@ -81,7 +63,11 @@ select
81
63
  playlist_saves_added,
82
64
  playlist_saves_removed
83
65
  from ${ctx.ref(t.database, t.schema, "p_playlist_traffic_source_a1_" + t.suffix)}
84
- where _PARTITIONDATE > source_date_checkpoint
66
+ ${ctx.when(
67
+ ctx.incremental(),
68
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
69
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
70
+ )}
85
71
  `;
86
72
  })
87
73
  .join(" union all "),
@@ -13,7 +13,7 @@ module.exports = (config) => {
13
13
  description:
14
14
  "Lookup table for operating system types and their names from YouTube Analytics API.",
15
15
  }).query(
16
- (ctx) =>
16
+ () =>
17
17
  `
18
18
 
19
19
  select
@@ -2,6 +2,8 @@
2
2
  * Copyright (C) 2025 by KEN-E, LLC
3
3
  */
4
4
 
5
+ const helpers = require("../../helpers");
6
+
5
7
  module.exports = (config) => {
6
8
  // eslint-disable-next-line no-undef
7
9
  return publish("stg_ytc_playback", {
@@ -11,14 +13,14 @@ module.exports = (config) => {
11
13
  protected: config.protected,
12
14
  tags: ["youtube", "source", "staging", "daily"],
13
15
  bigquery: {
14
- partitionBy: "interaction_date",
16
+ partitionBy: "data_date",
15
17
  clusterBy: ["video_id", "playback_location_type"],
16
18
  },
17
19
  assertions: {
18
20
  // make sure rows have unique dimensions
19
21
  uniqueKeys: [
20
22
  [
21
- "interaction_date",
23
+ "data_date",
22
24
  "channel_id",
23
25
  "video_id",
24
26
  "live_or_on_demand",
@@ -29,35 +31,14 @@ module.exports = (config) => {
29
31
  ],
30
32
  ],
31
33
  // make sure source partition and data dates match
32
- rowConditions: ["interaction_date = source_partition_date"],
34
+ // rowConditions: ["interaction_date = source_partition_date"],
33
35
  },
34
36
  description: "Staging table for YouTube Channel Playback Location data",
35
37
  })
36
- .preOps(
37
- (ctx) => `
38
- declare source_date_checkpoint default (
39
- select date("${config.startDate}")
40
- );
41
-
42
- --Set the incremental update checkpoint based on current max partition value minus lookback.
43
- set source_date_checkpoint = (
44
- ${ctx.when(
45
- ctx.incremental(),
46
- `select
47
- least(
48
- (select date_sub(current_date(), interval ${config.daysBack} day)),
49
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
50
- )`,
51
- `select date("${config.startDate}")`,
52
- )}
53
- );
54
-
55
- ${ctx.when(
56
- ctx.incremental(),
57
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
58
- )}
59
- `,
60
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_playback")}`;
41
+ })
61
42
  .query((ctx) =>
62
43
  config.sources
63
44
  .map((t) => {
@@ -65,7 +46,7 @@ declare source_date_checkpoint default (
65
46
 
66
47
  select
67
48
  _PARTITIONDATE as source_partition_date,
68
- parse_date('%Y%m%d', date) as interaction_date,
49
+ parse_date('%Y%m%d', date) as data_date,
69
50
  "${t.schema}" as site_nm,
70
51
  current_timestamp() as updated_at,
71
52
  channel_id,
@@ -82,7 +63,11 @@ select
82
63
  red_views,
83
64
  red_watch_time_minutes
84
65
  from ${ctx.ref(t.database, t.schema, "p_channel_playback_location_a2_" + t.suffix)}
85
- where _PARTITIONDATE > source_date_checkpoint
66
+ ${ctx.when(
67
+ ctx.incremental(),
68
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
69
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
70
+ )}
86
71
  `;
87
72
  })
88
73
  .join(" union all "),
@@ -3,6 +3,7 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../../column_descriptions");
6
+ const helpers = require("../../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
@@ -13,14 +14,14 @@ module.exports = (config) => {
13
14
  protected: config.protected,
14
15
  tags: ["youtube", "source", "staging", "daily"],
15
16
  bigquery: {
16
- partitionBy: "interaction_date",
17
+ partitionBy: "data_date",
17
18
  clusterBy: ["video_id"],
18
19
  },
19
20
  assertions: {
20
21
  // make sure rows have unique dimensions
21
22
  uniqueKeys: [
22
23
  [
23
- "interaction_date",
24
+ "data_date",
24
25
  "channel_id",
25
26
  "video_id",
26
27
  "live_or_on_demand",
@@ -30,43 +31,22 @@ module.exports = (config) => {
30
31
  ],
31
32
  ],
32
33
  // make sure source partition and data dates match
33
- rowConditions: ["interaction_date = source_partition_date"],
34
+ // rowConditions: ["interaction_date = source_partition_date"],
34
35
  },
35
36
  columns: column_descriptions.column_descriptions,
36
37
  description: "YT Channel Province Report Table - Staging",
37
38
  })
38
- .preOps(
39
- (ctx) => `
40
- declare source_date_checkpoint default (
41
- select date("${config.startDate}")
42
- );
43
-
44
- --Set the incremental update checkpoint based on current max partition value minus lookback.
45
- set source_date_checkpoint = (
46
- ${ctx.when(
47
- ctx.incremental(),
48
- `select
49
- least(
50
- (select date_sub(current_date(), interval ${config.daysBack} day)),
51
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
52
- )`,
53
- `select date("${config.startDate}")`,
54
- )}
55
- );
56
-
57
- ${ctx.when(
58
- ctx.incremental(),
59
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
60
- )}
61
- `,
62
- )
39
+ .preOps((ctx) => {
40
+ // Get pre_operations to find updated source partitions
41
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_province")}`;
42
+ })
63
43
  .query((ctx) =>
64
44
  config.sources
65
45
  .map((t) => {
66
46
  return `
67
47
  select
68
48
  _PARTITIONDATE as source_partition_date,
69
- parse_date('%Y%m%d',date) as interaction_date,
49
+ parse_date('%Y%m%d', date) as data_date,
70
50
  "${t.schema}" as site_nm,
71
51
  current_timestamp() as updated_at,
72
52
  channel_id,
@@ -95,7 +75,11 @@ select
95
75
  red_views,
96
76
  red_watch_time_minutes
97
77
  from ${ctx.ref(t.database, t.schema, "p_channel_province_a2_" + t.suffix)}
98
- where _PARTITIONDATE > source_date_checkpoint
78
+ ${ctx.when(
79
+ ctx.incremental(),
80
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
81
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
82
+ )}
99
83
  `;
100
84
  })
101
85
  .join(" union all "),
@@ -2,6 +2,8 @@
2
2
  * Copyright (C) 2025 by KEN-E, LLC
3
3
  */
4
4
 
5
+ const helpers = require("../../helpers");
6
+
5
7
  module.exports = (config) => {
6
8
  // eslint-disable-next-line no-undef
7
9
  return publish("stg_ytc_share_platform", {
@@ -11,14 +13,14 @@ module.exports = (config) => {
11
13
  protected: config.protected,
12
14
  tags: ["youtube", "source", "staging", "daily"],
13
15
  bigquery: {
14
- partitionBy: "interaction_date",
16
+ partitionBy: "data_date",
15
17
  clusterBy: ["video_id", "sharing_service"],
16
18
  },
17
19
  assertions: {
18
20
  // make sure rows have unique dimensions
19
21
  uniqueKeys: [
20
22
  [
21
- "interaction_date",
23
+ "data_date",
22
24
  "channel_id",
23
25
  "video_id",
24
26
  "live_or_on_demand",
@@ -28,42 +30,21 @@ module.exports = (config) => {
28
30
  ],
29
31
  ],
30
32
  // make sure source partition and data dates match
31
- rowConditions: ["interaction_date = source_partition_date"],
33
+ // rowConditions: ["interaction_date = source_partition_date"],
32
34
  },
33
35
  description: "Staging table for YouTube Channel Share Platform data",
34
36
  })
35
- .preOps(
36
- (ctx) => `
37
- declare source_date_checkpoint default (
38
- select date("${config.startDate}")
39
- );
40
-
41
- --Set the incremental update checkpoint based on current max partition value minus lookback.
42
- set source_date_checkpoint = (
43
- ${ctx.when(
44
- ctx.incremental(),
45
- `select
46
- least(
47
- (select date_sub(current_date(), interval ${config.daysBack} day)),
48
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
49
- )`,
50
- `select date("${config.startDate}")`,
51
- )}
52
- );
53
-
54
- ${ctx.when(
55
- ctx.incremental(),
56
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
57
- )}
58
- `,
59
- )
37
+ .preOps((ctx) => {
38
+ // Get pre_operations to find updated source partitions
39
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_share_platform")}`;
40
+ })
60
41
  .query((ctx) =>
61
42
  config.sources
62
43
  .map((t) => {
63
44
  return `
64
45
  select
65
46
  _PARTITIONDATE as source_partition_date,
66
- parse_date('%Y%m%d', date) as interaction_date,
47
+ parse_date('%Y%m%d', date) as data_date,
67
48
  "${t.schema}" as site_nm,
68
49
  current_timestamp() as updated_at,
69
50
  channel_id,
@@ -74,7 +55,11 @@ select
74
55
  cast(sharing_service as int) as sharing_service,
75
56
  shares
76
57
  from ${ctx.ref(t.database, t.schema, "p_channel_sharing_service_a1_" + t.suffix)}
77
- where _PARTITIONDATE > source_date_checkpoint
58
+ ${ctx.when(
59
+ ctx.incremental(),
60
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
61
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
62
+ )}
78
63
  `;
79
64
  })
80
65
  .join(" union all "),
@@ -3,6 +3,7 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../../column_descriptions");
6
+ const helpers = require("../../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
@@ -13,13 +14,13 @@ module.exports = (config) => {
13
14
  protected: config.protected,
14
15
  tags: ["youtube", "source", "staging", "daily"],
15
16
  bigquery: {
16
- partitionBy: "interaction_date",
17
+ partitionBy: "data_date",
17
18
  clusterBy: ["video_id"],
18
19
  },
19
20
  assertions: {
20
21
  uniqueKeys: [
21
22
  [
22
- "interaction_date",
23
+ "data_date",
23
24
  "channel_id",
24
25
  "video_id",
25
26
  "live_or_on_demand",
@@ -29,42 +30,22 @@ module.exports = (config) => {
29
30
  "subtitle_language_autotranslated",
30
31
  ],
31
32
  ],
32
- rowConditions: ["interaction_date = source_partition_date"],
33
+ // rowConditions: ["interaction_date = source_partition_date"],
33
34
  },
34
35
  columns: column_descriptions.column_descriptions,
35
36
  description: "YT Channel Subtitles Report Table - Staging",
36
37
  })
37
- .preOps(
38
- (ctx) => `
39
- declare source_date_checkpoint default (
40
- select date("${config.startDate}")
41
- );
42
-
43
- set source_date_checkpoint = (
44
- ${ctx.when(
45
- ctx.incremental(),
46
- `select
47
- least(
48
- (select date_sub(current_date(), interval ${config.daysBack} day)),
49
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
50
- )`,
51
- `select date("${config.startDate}")`,
52
- )}
53
- );
54
-
55
- ${ctx.when(
56
- ctx.incremental(),
57
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
58
- )}
59
- `,
60
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_subtitles")}`;
41
+ })
61
42
  .query((ctx) =>
62
43
  config.sources
63
44
  .map((t) => {
64
45
  return `
65
46
  select
66
47
  _PARTITIONDATE as source_partition_date,
67
- parse_date('%Y%m%d',date) as interaction_date,
48
+ parse_date('%Y%m%d', date) as data_date,
68
49
  "${t.schema}" as site_nm,
69
50
  current_timestamp() as updated_at,
70
51
  channel_id,
@@ -81,7 +62,11 @@ select
81
62
  red_views,
82
63
  red_watch_time_minutes
83
64
  from ${ctx.ref(t.database, t.schema, "p_channel_subtitles_a2_" + t.suffix)}
84
- where _PARTITIONDATE > source_date_checkpoint
65
+ ${ctx.when(
66
+ ctx.incremental(),
67
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
68
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
69
+ )}
85
70
  `;
86
71
  })
87
72
  .join(" union all "),
@@ -2,6 +2,8 @@
2
2
  * Copyright (C) 2025 by KEN-E, LLC
3
3
  */
4
4
 
5
+ const helpers = require("../../helpers");
6
+
5
7
  module.exports = (config) => {
6
8
  // eslint-disable-next-line no-undef
7
9
  return publish("stg_ytc_traffic_source", {
@@ -11,14 +13,14 @@ module.exports = (config) => {
11
13
  protected: config.protected,
12
14
  tags: ["youtube", "source", "staging", "daily"],
13
15
  bigquery: {
14
- partitionBy: "interaction_date",
16
+ partitionBy: "data_date",
15
17
  clusterBy: ["video_id", "traffic_source_type"],
16
18
  },
17
19
  assertions: {
18
20
  // make sure rows have unique dimensions
19
21
  uniqueKeys: [
20
22
  [
21
- "interaction_date",
23
+ "data_date",
22
24
  "channel_id",
23
25
  "video_id",
24
26
  "live_or_on_demand",
@@ -29,60 +31,21 @@ module.exports = (config) => {
29
31
  ],
30
32
  ],
31
33
  // make sure source partition and data dates match
32
- rowConditions: ["interaction_date = source_partition_date"],
33
- },
34
- description: "Staging table for YouTube Channel Traffic Source data",
35
- assertions: {
36
- // make sure rows have unique dimensions
37
- uniqueKeys: [
38
- [
39
- "interaction_date",
40
- "channel_id",
41
- "video_id",
42
- "live_or_on_demand",
43
- "subscribed_status",
44
- "country_code",
45
- "traffic_source_type",
46
- "traffic_source_detail",
47
- ],
48
- ],
49
- // make sure source partition and data dates match
50
- rowConditions: ["interaction_date = source_partition_date"],
34
+ // rowConditions: ["interaction_date = source_partition_date"],
51
35
  },
52
36
  description: "Staging table for YouTube Channel Traffic Source data",
53
37
  })
54
- .preOps(
55
- (ctx) => `
56
- declare source_date_checkpoint default (
57
- select date("${config.startDate}")
58
- );
59
-
60
- --Set the incremental update checkpoint based on current max partition value minus lookback.
61
- set source_date_checkpoint = (
62
- ${ctx.when(
63
- ctx.incremental(),
64
- `select
65
- least(
66
- (select date_sub(current_date(), interval ${config.daysBack} day)),
67
- (select date_sub(max(source_partition_date), interval ${config.daysBack} day) from ${ctx.self()})
68
- )`,
69
- `select date("${config.startDate}")`,
70
- )}
71
- );
72
-
73
- ${ctx.when(
74
- ctx.incremental(),
75
- `delete ${ctx.self()} where source_partition_date > source_date_checkpoint`,
76
- )}
77
- `,
78
- )
38
+ .preOps((ctx) => {
39
+ // Get pre_operations to find updated source partitions
40
+ return `${helpers.staging_preops(ctx, config, "stg_ytc_traffic_source")}`;
41
+ })
79
42
  .query((ctx) =>
80
43
  config.sources
81
44
  .map((t) => {
82
45
  return `
83
46
  select
84
47
  _PARTITIONDATE as source_partition_date,
85
- parse_date('%Y%m%d', date) as interaction_date,
48
+ parse_date('%Y%m%d', date) as data_date,
86
49
  "${t.schema}" as site_nm,
87
50
  current_timestamp() as updated_at,
88
51
  channel_id,
@@ -99,7 +62,11 @@ select
99
62
  red_views,
100
63
  red_watch_time_minutes
101
64
  from ${ctx.ref(t.database, t.schema, "p_channel_traffic_source_a2_" + t.suffix)}
102
- where _PARTITIONDATE > source_date_checkpoint
65
+ ${ctx.when(
66
+ ctx.incremental(),
67
+ `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t}"))`,
68
+ `where date(_PARTITIONTIME) > date(self_checkpoint)`,
69
+ )}
103
70
  `;
104
71
  })
105
72
  .join(" union all "),
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_annotation", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id", "annotation_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Annotation Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_annotation"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -58,8 +38,13 @@ left join ${ctx.ref(
58
38
  config.titlesTable,
59
39
  )} as titles
60
40
  using (video_id)
61
- where interaction_date > interaction_date_checkpoint
62
-
41
+ ${ctx.when(
42
+ ctx.incremental(),
43
+ `where
44
+ data_date in unnest(partitions_to_update)`,
45
+ `where
46
+ data_date > date '${config.startDate}'`,
47
+ )}
63
48
  `,
64
49
  );
65
50
  };
@@ -3,47 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_basic", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Basic Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- -- Calculate date checkpoint for incremental updates.
23
-
24
- declare interaction_date_checkpoint default (
25
- select date("${config.startDate}")
26
- );
27
-
28
- --Set the incremental update checkpoint based on current max partition value minus lookback.
29
- set interaction_date_checkpoint = (
30
- ${ctx.when(
31
- ctx.incremental(),
32
- `select
33
- least(
34
- (select date_sub(current_date(), interval ${config.daysBack} day)),
35
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
36
- )`,
37
- `select date("${config.startDate}")`,
38
- )}
39
- );
40
-
41
- ${ctx.when(
42
- ctx.incremental(),
43
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
44
- )}
45
- `,
46
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_basic"))}`;
24
+ })
47
25
  .query(
48
26
  (ctx) => `
49
27
 
@@ -54,7 +32,13 @@ with int_ex_titles as (
54
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds,
55
33
  subscribers_gained - subscribers_lost as subscribers_net,
56
34
  from ${ctx.ref("stg_ytc_basic")}
57
- where interaction_date > interaction_date_checkpoint
35
+ ${ctx.when(
36
+ ctx.incremental(),
37
+ `where
38
+ data_date in unnest(partitions_to_update)`,
39
+ `where
40
+ data_date > date '${config.startDate}'`,
41
+ )}
58
42
  )
59
43
 
60
44
  select