@ken-e/dataform-youtube 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/includes/column_descriptions.js +137 -95
  2. package/includes/definitions/sources/stg_ytc_annotation.js +16 -31
  3. package/includes/definitions/sources/stg_ytc_basic.js +15 -31
  4. package/includes/definitions/sources/stg_ytc_cards.js +16 -31
  5. package/includes/definitions/sources/stg_ytc_combined.js +15 -31
  6. package/includes/definitions/sources/stg_ytc_demographics.js +16 -31
  7. package/includes/definitions/sources/stg_ytc_device_os.js +16 -31
  8. package/includes/definitions/sources/stg_ytc_end_screens.js +14 -29
  9. package/includes/definitions/sources/stg_ytc_list_basic.js +15 -31
  10. package/includes/definitions/sources/stg_ytc_list_combined.js +16 -30
  11. package/includes/definitions/sources/stg_ytc_list_device_os.js +16 -30
  12. package/includes/definitions/sources/stg_ytc_list_playback.js +16 -30
  13. package/includes/definitions/sources/stg_ytc_list_province.js +15 -31
  14. package/includes/definitions/sources/stg_ytc_list_traffic_source.js +16 -30
  15. package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
  16. package/includes/definitions/sources/stg_ytc_playback.js +16 -31
  17. package/includes/definitions/sources/stg_ytc_province.js +15 -31
  18. package/includes/definitions/sources/stg_ytc_share_platform.js +16 -31
  19. package/includes/definitions/sources/stg_ytc_subtitles.js +15 -30
  20. package/includes/definitions/sources/stg_ytc_traffic_source.js +16 -49
  21. package/includes/definitions/ytc_annotation.js +14 -29
  22. package/includes/definitions/ytc_basic.js +14 -30
  23. package/includes/definitions/ytc_cards.js +14 -29
  24. package/includes/definitions/ytc_combined.js +14 -31
  25. package/includes/definitions/ytc_demographics.js +14 -28
  26. package/includes/definitions/ytc_demographics_views.js +15 -29
  27. package/includes/definitions/ytc_device_os.js +14 -28
  28. package/includes/definitions/ytc_end_screens.js +14 -28
  29. package/includes/definitions/ytc_list_basic.js +14 -28
  30. package/includes/definitions/ytc_list_combined.js +14 -27
  31. package/includes/definitions/ytc_list_device_os.js +14 -27
  32. package/includes/definitions/ytc_list_playback.js +14 -27
  33. package/includes/definitions/ytc_list_province.js +14 -28
  34. package/includes/definitions/ytc_list_traffic_source.js +14 -27
  35. package/includes/definitions/ytc_playback.js +14 -28
  36. package/includes/definitions/ytc_province.js +14 -28
  37. package/includes/definitions/ytc_share_platform.js +14 -28
  38. package/includes/definitions/ytc_subtitles.js +14 -27
  39. package/includes/definitions/ytc_traffic_source.js +14 -28
  40. package/includes/helpers.js +12 -0
  41. package/index.js +12 -2
  42. package/package.json +3 -3
@@ -3,44 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_combined", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Playlist Combined Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- set interaction_date_checkpoint = (
27
- ${ctx.when(
28
- ctx.incremental(),
29
- `select
30
- least(
31
- (select date_sub(current_date(), interval ${config.daysBack} day)),
32
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
33
- )`,
34
- `select date("${config.startDate}")`,
35
- )}
36
- );
37
-
38
- ${ctx.when(
39
- ctx.incremental(),
40
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
41
- )}
42
- `,
43
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_combined"))}`;
24
+ })
44
25
  .query(
45
26
  (ctx) => `
46
27
 
@@ -65,7 +46,13 @@ with int_with_lookup_names as (
65
46
  using (playback_location_type)
66
47
  left join ${ctx.ref("stg_ytc_lu_traffic_sources")} as ts
67
48
  using (traffic_source_type)
68
- where interaction_date > interaction_date_checkpoint
49
+ ${ctx.when(
50
+ ctx.incremental(),
51
+ `where
52
+ data_date in unnest(partitions_to_update)`,
53
+ `where
54
+ data_date > date '${config.startDate}'`,
55
+ )}
69
56
  ),
70
57
 
71
58
  int_with_video_titles as (
@@ -3,45 +3,26 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_device_os", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id", "device_type", "operating_system"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description:
19
20
  "YT Channel Playlist Device and OS Report Table - Intermediate",
20
21
  })
21
- .preOps(
22
- (ctx) => `
23
- declare interaction_date_checkpoint default (
24
- select date("${config.startDate}")
25
- );
26
-
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
22
+ .preOps((ctx) => {
23
+ // Get pre_operations to find updated output partitions
24
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_device_os"))}`;
25
+ })
45
26
  .query(
46
27
  (ctx) => `
47
28
 
@@ -58,7 +39,13 @@ with int_with_device_os_names as (
58
39
  using (device_type)
59
40
  left join ${ctx.ref("stg_ytc_lu_operating_systems")} as os
60
41
  using (operating_system)
61
- where interaction_date > interaction_date_checkpoint
42
+ ${ctx.when(
43
+ ctx.incremental(),
44
+ `where
45
+ data_date in unnest(partitions_to_update)`,
46
+ `where
47
+ data_date > date '${config.startDate}'`,
48
+ )}
62
49
  ),
63
50
 
64
51
  int_with_video_titles as (
@@ -3,45 +3,26 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_playback", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id", "playback_location_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description:
19
20
  "YT Channel Playlist Playback Location Report Table - Intermediate",
20
21
  })
21
- .preOps(
22
- (ctx) => `
23
- declare interaction_date_checkpoint default (
24
- select date("${config.startDate}")
25
- );
26
-
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
22
+ .preOps((ctx) => {
23
+ // Get pre_operations to find updated output partitions
24
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_playback"))}`;
25
+ })
45
26
  .query(
46
27
  (ctx) => `
47
28
 
@@ -54,7 +35,13 @@ with int_with_playback_names as (
54
35
  from ${ctx.ref("stg_ytc_list_playback")} as base
55
36
  left join ${ctx.ref("stg_ytc_lu_playback_location")} as pl
56
37
  using (playback_location_type)
57
- where interaction_date > interaction_date_checkpoint
38
+ ${ctx.when(
39
+ ctx.incremental(),
40
+ `where
41
+ data_date in unnest(partitions_to_update)`,
42
+ `where
43
+ data_date > date '${config.startDate}'`,
44
+ )}
58
45
  ),
59
46
 
60
47
  int_with_video_titles as (
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_province", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Playlist Province Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_province"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  playlist_saves_added - playlist_saves_removed as playlist_saves_net
53
33
  from ${ctx.ref("stg_ytc_list_province")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  ),
56
42
 
57
43
  int_with_video_titles as (
@@ -3,45 +3,26 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_traffic_source", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id", "traffic_source_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description:
19
20
  "YT Channel Playlist Traffic Source Report Table - Intermediate",
20
21
  })
21
- .preOps(
22
- (ctx) => `
23
- declare interaction_date_checkpoint default (
24
- select date("${config.startDate}")
25
- );
26
-
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
22
+ .preOps((ctx) => {
23
+ // Get pre_operations to find updated output partitions
24
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_traffic_source"))}`;
25
+ })
45
26
  .query(
46
27
  (ctx) => `
47
28
 
@@ -50,7 +31,13 @@ with int_ex_titles as (
50
31
  * except (source_partition_date),
51
32
  playlist_saves_added - playlist_saves_removed as playlist_saves_net
52
33
  from ${ctx.ref("stg_ytc_list_traffic_source")}
53
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
54
41
  ),
55
42
 
56
43
  int_with_traffic_source as (
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_playback", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id", "playback_location_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Playback Location Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_playback"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
53
33
  from ${ctx.ref("stg_ytc_playback")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  )
56
42
 
57
43
  select
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_province", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Province Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_province"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
53
33
  from ${ctx.ref("stg_ytc_province")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  )
56
42
 
57
43
  select
@@ -7,45 +7,25 @@
7
7
  */
8
8
 
9
9
  const column_descriptions = require("../column_descriptions");
10
+ const helpers = require("../helpers");
10
11
 
11
12
  module.exports = (config) => {
12
13
  // eslint-disable-next-line no-undef
13
14
  return publish("ytc_share_platform", {
14
15
  type: "incremental",
15
- schema: config.datasetIntermediate,
16
+ schema: config.target.outputSchema,
16
17
  tags: ["youtube", "output", "daily"],
17
18
  bigquery: {
18
- partitionBy: "interaction_date",
19
+ partitionBy: "data_date",
19
20
  clusterBy: ["video_id", "sharing_service"],
20
21
  },
21
22
  columns: column_descriptions.column_descriptions,
22
23
  description: "YT Channel Share Platform Report Table - Intermediate",
23
24
  })
24
- .preOps(
25
- (ctx) => `
26
- declare interaction_date_checkpoint default (
27
- select date("${config.startDate}")
28
- );
29
-
30
- --Set the incremental update checkpoint based on current max partition value minus lookback.
31
- set interaction_date_checkpoint = (
32
- ${ctx.when(
33
- ctx.incremental(),
34
- `select
35
- least(
36
- (select date_sub(current_date(), interval ${config.daysBack} day)),
37
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
38
- )`,
39
- `select date("${config.startDate}")`,
40
- )}
41
- );
42
-
43
- ${ctx.when(
44
- ctx.incremental(),
45
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
46
- )}
47
- `,
48
- )
25
+ .preOps((ctx) => {
26
+ // Get pre_operations to find updated output partitions
27
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_share_platform"))}`;
28
+ })
49
29
  .query(
50
30
  (ctx) => `
51
31
 
@@ -54,7 +34,13 @@ with int_ex_titles as (
54
34
  select
55
35
  * except (source_partition_date)
56
36
  from ${ctx.ref("stg_ytc_share_platform")}
57
- where interaction_date > interaction_date_checkpoint
37
+ ${ctx.when(
38
+ ctx.incremental(),
39
+ `where
40
+ data_date in unnest(partitions_to_update)`,
41
+ `where
42
+ data_date > date '${config.startDate}'`,
43
+ )}
58
44
  )
59
45
 
60
46
  select
@@ -3,44 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_subtitles", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Subtitles Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- set interaction_date_checkpoint = (
27
- ${ctx.when(
28
- ctx.incremental(),
29
- `select
30
- least(
31
- (select date_sub(current_date(), interval ${config.daysBack} day)),
32
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
33
- )`,
34
- `select date("${config.startDate}")`,
35
- )}
36
- );
37
-
38
- ${ctx.when(
39
- ctx.incremental(),
40
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
41
- )}
42
- `,
43
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_subtitles"))}`;
24
+ })
44
25
  .query(
45
26
  (ctx) => `
46
27
 
@@ -49,7 +30,13 @@ with int_ex_titles as (
49
30
  * except (source_partition_date),
50
31
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
51
32
  from ${ctx.ref("stg_ytc_subtitles")}
52
- where interaction_date > interaction_date_checkpoint
33
+ ${ctx.when(
34
+ ctx.incremental(),
35
+ `where
36
+ data_date in unnest(partitions_to_update)`,
37
+ `where
38
+ data_date > date '${config.startDate}'`,
39
+ )}
53
40
  )
54
41
 
55
42
  select
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_traffic_source", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.target.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id", "traffic_source_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Traffic Source Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_traffic_source"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
53
33
  from ${ctx.ref("stg_ytc_traffic_source")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  )
56
42
 
57
43
  select