@ken-e/dataform-youtube 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/includes/column_descriptions.js +137 -95
  2. package/includes/definitions/sources/stg_ytc_annotation.js +15 -30
  3. package/includes/definitions/sources/stg_ytc_basic.js +14 -30
  4. package/includes/definitions/sources/stg_ytc_cards.js +15 -30
  5. package/includes/definitions/sources/stg_ytc_combined.js +14 -30
  6. package/includes/definitions/sources/stg_ytc_demographics.js +15 -30
  7. package/includes/definitions/sources/stg_ytc_device_os.js +15 -30
  8. package/includes/definitions/sources/stg_ytc_end_screens.js +13 -28
  9. package/includes/definitions/sources/stg_ytc_list_basic.js +14 -30
  10. package/includes/definitions/sources/stg_ytc_list_combined.js +15 -29
  11. package/includes/definitions/sources/stg_ytc_list_device_os.js +15 -29
  12. package/includes/definitions/sources/stg_ytc_list_playback.js +15 -29
  13. package/includes/definitions/sources/stg_ytc_list_province.js +14 -30
  14. package/includes/definitions/sources/stg_ytc_list_traffic_source.js +15 -29
  15. package/includes/definitions/sources/stg_ytc_lu_operating_systems.js +1 -1
  16. package/includes/definitions/sources/stg_ytc_playback.js +15 -30
  17. package/includes/definitions/sources/stg_ytc_province.js +14 -30
  18. package/includes/definitions/sources/stg_ytc_share_platform.js +15 -30
  19. package/includes/definitions/sources/stg_ytc_subtitles.js +14 -29
  20. package/includes/definitions/sources/stg_ytc_traffic_source.js +15 -48
  21. package/includes/definitions/ytc_annotation.js +14 -29
  22. package/includes/definitions/ytc_basic.js +14 -30
  23. package/includes/definitions/ytc_cards.js +14 -29
  24. package/includes/definitions/ytc_combined.js +14 -31
  25. package/includes/definitions/ytc_demographics.js +14 -28
  26. package/includes/definitions/ytc_demographics_views.js +15 -29
  27. package/includes/definitions/ytc_device_os.js +14 -28
  28. package/includes/definitions/ytc_end_screens.js +14 -28
  29. package/includes/definitions/ytc_list_basic.js +14 -28
  30. package/includes/definitions/ytc_list_combined.js +14 -27
  31. package/includes/definitions/ytc_list_device_os.js +14 -27
  32. package/includes/definitions/ytc_list_playback.js +14 -27
  33. package/includes/definitions/ytc_list_province.js +14 -28
  34. package/includes/definitions/ytc_list_traffic_source.js +14 -27
  35. package/includes/definitions/ytc_playback.js +14 -28
  36. package/includes/definitions/ytc_province.js +14 -28
  37. package/includes/definitions/ytc_share_platform.js +14 -28
  38. package/includes/definitions/ytc_subtitles.js +14 -27
  39. package/includes/definitions/ytc_traffic_source.js +14 -28
  40. package/includes/helpers.js +12 -0
  41. package/index.js +12 -2
  42. package/package.json +3 -3
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_province", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Playlist Province Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_province"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  playlist_saves_added - playlist_saves_removed as playlist_saves_net
53
33
  from ${ctx.ref("stg_ytc_list_province")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  ),
56
42
 
57
43
  int_with_video_titles as (
@@ -3,45 +3,26 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_list_traffic_source", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["playlist_id", "video_id", "traffic_source_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description:
19
20
  "YT Channel Playlist Traffic Source Report Table - Intermediate",
20
21
  })
21
- .preOps(
22
- (ctx) => `
23
- declare interaction_date_checkpoint default (
24
- select date("${config.startDate}")
25
- );
26
-
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
22
+ .preOps((ctx) => {
23
+ // Get pre_operations to find updated output partitions
24
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_list_traffic_source"))}`;
25
+ })
45
26
  .query(
46
27
  (ctx) => `
47
28
 
@@ -50,7 +31,13 @@ with int_ex_titles as (
50
31
  * except (source_partition_date),
51
32
  playlist_saves_added - playlist_saves_removed as playlist_saves_net
52
33
  from ${ctx.ref("stg_ytc_list_traffic_source")}
53
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
54
41
  ),
55
42
 
56
43
  int_with_traffic_source as (
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_playback", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id", "playback_location_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Playback Location Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_playback"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
53
33
  from ${ctx.ref("stg_ytc_playback")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  )
56
42
 
57
43
  select
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_province", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Province Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_province"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
53
33
  from ${ctx.ref("stg_ytc_province")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  )
56
42
 
57
43
  select
@@ -7,45 +7,25 @@
7
7
  */
8
8
 
9
9
  const column_descriptions = require("../column_descriptions");
10
+ const helpers = require("../helpers");
10
11
 
11
12
  module.exports = (config) => {
12
13
  // eslint-disable-next-line no-undef
13
14
  return publish("ytc_share_platform", {
14
15
  type: "incremental",
15
- schema: config.datasetIntermediate,
16
+ schema: config.outputSchema,
16
17
  tags: ["youtube", "output", "daily"],
17
18
  bigquery: {
18
- partitionBy: "interaction_date",
19
+ partitionBy: "data_date",
19
20
  clusterBy: ["video_id", "sharing_service"],
20
21
  },
21
22
  columns: column_descriptions.column_descriptions,
22
23
  description: "YT Channel Share Platform Report Table - Intermediate",
23
24
  })
24
- .preOps(
25
- (ctx) => `
26
- declare interaction_date_checkpoint default (
27
- select date("${config.startDate}")
28
- );
29
-
30
- --Set the incremental update checkpoint based on current max partition value minus lookback.
31
- set interaction_date_checkpoint = (
32
- ${ctx.when(
33
- ctx.incremental(),
34
- `select
35
- least(
36
- (select date_sub(current_date(), interval ${config.daysBack} day)),
37
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
38
- )`,
39
- `select date("${config.startDate}")`,
40
- )}
41
- );
42
-
43
- ${ctx.when(
44
- ctx.incremental(),
45
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
46
- )}
47
- `,
48
- )
25
+ .preOps((ctx) => {
26
+ // Get pre_operations to find updated output partitions
27
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_share_platform"))}`;
28
+ })
49
29
  .query(
50
30
  (ctx) => `
51
31
 
@@ -54,7 +34,13 @@ with int_ex_titles as (
54
34
  select
55
35
  * except (source_partition_date)
56
36
  from ${ctx.ref("stg_ytc_share_platform")}
57
- where interaction_date > interaction_date_checkpoint
37
+ ${ctx.when(
38
+ ctx.incremental(),
39
+ `where
40
+ data_date in unnest(partitions_to_update)`,
41
+ `where
42
+ data_date > date '${config.startDate}'`,
43
+ )}
58
44
  )
59
45
 
60
46
  select
@@ -3,44 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_subtitles", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Subtitles Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- set interaction_date_checkpoint = (
27
- ${ctx.when(
28
- ctx.incremental(),
29
- `select
30
- least(
31
- (select date_sub(current_date(), interval ${config.daysBack} day)),
32
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
33
- )`,
34
- `select date("${config.startDate}")`,
35
- )}
36
- );
37
-
38
- ${ctx.when(
39
- ctx.incremental(),
40
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
41
- )}
42
- `,
43
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_subtitles"))}`;
24
+ })
44
25
  .query(
45
26
  (ctx) => `
46
27
 
@@ -49,7 +30,13 @@ with int_ex_titles as (
49
30
  * except (source_partition_date),
50
31
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
51
32
  from ${ctx.ref("stg_ytc_subtitles")}
52
- where interaction_date > interaction_date_checkpoint
33
+ ${ctx.when(
34
+ ctx.incremental(),
35
+ `where
36
+ data_date in unnest(partitions_to_update)`,
37
+ `where
38
+ data_date > date '${config.startDate}'`,
39
+ )}
53
40
  )
54
41
 
55
42
  select
@@ -3,45 +3,25 @@
3
3
  */
4
4
 
5
5
  const column_descriptions = require("../column_descriptions");
6
+ const helpers = require("../helpers");
6
7
 
7
8
  module.exports = (config) => {
8
9
  // eslint-disable-next-line no-undef
9
10
  return publish("ytc_traffic_source", {
10
11
  type: "incremental",
11
- schema: config.datasetIntermediate,
12
+ schema: config.outputSchema,
12
13
  tags: ["youtube", "output", "daily"],
13
14
  bigquery: {
14
- partitionBy: "interaction_date",
15
+ partitionBy: "data_date",
15
16
  clusterBy: ["video_id", "traffic_source_type"],
16
17
  },
17
18
  columns: column_descriptions.column_descriptions,
18
19
  description: "YT Channel Traffic Source Report Table - Intermediate",
19
20
  })
20
- .preOps(
21
- (ctx) => `
22
- declare interaction_date_checkpoint default (
23
- select date("${config.startDate}")
24
- );
25
-
26
- --Set the incremental update checkpoint based on current max partition value minus lookback.
27
- set interaction_date_checkpoint = (
28
- ${ctx.when(
29
- ctx.incremental(),
30
- `select
31
- least(
32
- (select date_sub(current_date(), interval ${config.daysBack} day)),
33
- (select date_sub(max(interaction_date), interval ${config.daysBack} day) from ${ctx.self()})
34
- )`,
35
- `select date("${config.startDate}")`,
36
- )}
37
- );
38
-
39
- ${ctx.when(
40
- ctx.incremental(),
41
- `delete ${ctx.self()} where interaction_date > interaction_date_checkpoint`,
42
- )}
43
- `,
44
- )
21
+ .preOps((ctx) => {
22
+ // Get pre_operations to find updated output partitions
23
+ return `${helpers.output_preops(ctx, config, ctx.ref("stg_ytc_traffic_source"))}`;
24
+ })
45
25
  .query(
46
26
  (ctx) => `
47
27
 
@@ -51,7 +31,13 @@ with int_ex_titles as (
51
31
  * except (source_partition_date),
52
32
  views * safe_divide(average_view_duration_seconds, average_view_duration_percentage) as row_max_duration_seconds
53
33
  from ${ctx.ref("stg_ytc_traffic_source")}
54
- where interaction_date > interaction_date_checkpoint
34
+ ${ctx.when(
35
+ ctx.incremental(),
36
+ `where
37
+ data_date in unnest(partitions_to_update)`,
38
+ `where
39
+ data_date > date '${config.startDate}'`,
40
+ )}
55
41
  )
56
42
 
57
43
  select
@@ -3,6 +3,18 @@
3
3
  * Javascript functions that are referenced throughout the models.
4
4
  */
5
5
 
6
+ const { incremental } = require("@ken-e/dataform-helpers");
7
+
8
+ const {
9
+ BQ_DATA_TRANSFER_PARTITION_STAGING_PREOPS,
10
+ BQ_DATA_TRANSFER_PARTITION_OUTPUT_PREOPS,
11
+ } = incremental;
12
+
13
+ module.exports = {
14
+ staging_preops: BQ_DATA_TRANSFER_PARTITION_STAGING_PREOPS,
15
+ output_preops: BQ_DATA_TRANSFER_PARTITION_OUTPUT_PREOPS,
16
+ };
17
+
6
18
  function addTableSuffix(table) {
7
19
  return table + config.sourceTableSuffix;
8
20
  }
package/index.js CHANGED
@@ -77,6 +77,16 @@ module.exports = (config) => {
77
77
  throw new Error("Missing target database");
78
78
  }
79
79
 
80
+ if (
81
+ !config.target.schema &&
82
+ !config.target.stagingSchema &&
83
+ !config.target.outputSchema
84
+ ) {
85
+ throw new Error(
86
+ "Missing output schema. Supply config.target.schema or individual config.target.stagingSchema and config.target.outputSchema",
87
+ );
88
+ }
89
+
80
90
  // TODO: add defaults for your config options
81
91
  config = {
82
92
  sourceTableSuffix: "_drpg",
@@ -90,8 +100,8 @@ module.exports = (config) => {
90
100
  };
91
101
 
92
102
  config.target = {
93
- stagingSchema: "propeller_dataform_template",
94
- outputSchema: "propeller_dataform_template",
103
+ stagingSchema: config.target.stagingSchema || config.target.schema,
104
+ outputSchema: config.target.outputSchema || config.target.schema,
95
105
  protected: false,
96
106
  ...config.target,
97
107
  };
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@ken-e/dataform-youtube",
3
3
  "dependencies": {
4
4
  "@dataform/core": "3.0.9",
5
- "@ken-e/dataform-helpers": "latest"
5
+ "@ken-e/dataform-helpers": "0.0.6"
6
6
  },
7
7
  "devDependencies": {
8
8
  "@eslint/js": "^9.18.0",
@@ -11,8 +11,8 @@
11
11
  "globals": "^15.14.0",
12
12
  "prettier": "3.4.2"
13
13
  },
14
- "version": "0.0.2",
15
- "description": "TODO",
14
+ "version": "0.0.3",
15
+ "description": "A dataform package to process Youtube data from the BQ Data Transfer.",
16
16
  "main": "index.js",
17
17
  "scripts": {
18
18
  "test": "echo \"Error: no test specified\" && exit 1"