@ken-e/dataform-youtube 0.0.9 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,23 +39,23 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_channel_basic_a2_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_channel_basic_a2_", "p_channel_basic_a3_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
- select
46
+ .flatMap((t) => {
47
+ const columns = `
49
48
  _PARTITIONDATE as source_partition_date,
50
49
  parse_date('%Y%m%d', date) as data_date,
51
50
  "${t.schema}" as site_nm,
52
- current_timestamp() as updated_at,
51
+ current_timestamp() as updated_at,
53
52
  channel_id,
54
53
  video_id,
55
54
  live_or_on_demand,
56
55
  subscribed_status,
57
56
  country_code,
58
- views,
57
+ views,`;
58
+ const restColumns = `
59
59
  comments,
60
60
  likes,
61
61
  dislikes,
@@ -81,14 +81,32 @@ select
81
81
  videos_added_to_playlists,
82
82
  videos_removed_from_playlists,
83
83
  red_views,
84
- red_watch_time_minutes,
85
- from ${ctx.ref(t.database, t.schema, "p_channel_basic_a2_" + t.suffix)}
84
+ red_watch_time_minutes,`;
85
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
86
+ return [
87
+ `
88
+ -- Deprecated a2 table (data before cutover)
89
+ select ${columns}
90
+ cast(null as int64) as engaged_views,${restColumns}
91
+ from ${ctx.ref(t.database, t.schema, "p_channel_basic_a2_" + t.suffix)}
86
92
  ${ctx.when(
87
93
  ctx.incremental(),
88
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
89
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
94
+ incrementalWhere,
95
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
90
96
  )}
91
- `;
97
+ `,
98
+ `
99
+ -- New a3 table (data from cutover onward)
100
+ select ${columns}
101
+ engaged_views,${restColumns}
102
+ from ${ctx.ref(t.database, t.schema, "p_channel_basic_a3_" + t.suffix)}
103
+ ${ctx.when(
104
+ ctx.incremental(),
105
+ incrementalWhere,
106
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
107
+ )}
108
+ `,
109
+ ];
92
110
  })
93
111
  .join(" union all "),
94
112
  );
@@ -43,40 +43,57 @@ module.exports = (config) => {
43
43
  })
44
44
  .preOps((ctx) => {
45
45
  // Get pre_operations to find updated source partitions
46
- return `${helpers.staging_preops(ctx, config, "p_channel_combined_a2_")}`;
46
+ return `${helpers.staging_preops(ctx, config, ["p_channel_combined_a2_", "p_channel_combined_a3_"])}`;
47
47
  })
48
48
  .query((ctx) =>
49
49
  config.sources
50
- .map((t) => {
51
- return `
52
-
53
- select
50
+ .flatMap((t) => {
51
+ const columns = `
54
52
  _PARTITIONDATE as source_partition_date,
55
53
  parse_date('%Y%m%d', date) as data_date,
56
54
  "${t.schema}" as site_nm,
57
- current_timestamp() as updated_at,
55
+ current_timestamp() as updated_at,
58
56
  channel_id,
59
57
  video_id,
60
58
  live_or_on_demand,
61
59
  subscribed_status,
62
60
  country_code,
63
- playback_location_type,
64
- traffic_source_type,
65
- device_type,
61
+ playback_location_type,
62
+ traffic_source_type,
63
+ device_type,
66
64
  operating_system,
67
- views,
65
+ views,`;
66
+ const restColumns = `
68
67
  watch_time_minutes,
69
68
  average_view_duration_seconds,
70
69
  average_view_duration_percentage,
71
70
  red_views,
72
- red_watch_time_minutes,
73
- from ${ctx.ref(t.database, t.schema, "p_channel_combined_a2_" + t.suffix)}
71
+ red_watch_time_minutes,`;
72
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
73
+ return [
74
+ `
75
+ -- Deprecated a2 table (data before cutover)
76
+ select ${columns}
77
+ cast(null as int64) as engaged_views,${restColumns}
78
+ from ${ctx.ref(t.database, t.schema, "p_channel_combined_a2_" + t.suffix)}
79
+ ${ctx.when(
80
+ ctx.incremental(),
81
+ incrementalWhere,
82
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
83
+ )}
84
+ `,
85
+ `
86
+ -- New a3 table (data from cutover onward)
87
+ select ${columns}
88
+ engaged_views,${restColumns}
89
+ from ${ctx.ref(t.database, t.schema, "p_channel_combined_a3_" + t.suffix)}
74
90
  ${ctx.when(
75
91
  ctx.incremental(),
76
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
77
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
92
+ incrementalWhere,
93
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
78
94
  )}
79
- `;
95
+ `,
96
+ ];
80
97
  })
81
98
  .join(" union all "),
82
99
  );
@@ -39,14 +39,12 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_channel_device_os_a2_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_channel_device_os_a2_", "p_channel_device_os_a3_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
-
49
- select
46
+ .flatMap((t) => {
47
+ const columns = `
50
48
  _PARTITIONDATE as source_partition_date,
51
49
  parse_date('%Y%m%d', date) as data_date,
52
50
  "${t.schema}" as site_nm,
@@ -58,19 +56,38 @@ select
58
56
  country_code,
59
57
  device_type,
60
58
  operating_system,
61
- views,
59
+ views,`;
60
+ const restColumns = `
62
61
  watch_time_minutes,
63
62
  average_view_duration_seconds,
64
63
  average_view_duration_percentage,
65
64
  red_views,
66
- red_watch_time_minutes
65
+ red_watch_time_minutes`;
66
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
67
+ return [
68
+ `
69
+ -- Deprecated a2 table (data before cutover)
70
+ select ${columns}
71
+ cast(null as int64) as engaged_views,${restColumns}
67
72
  from ${ctx.ref(t.database, t.schema, "p_channel_device_os_a2_" + t.suffix)}
68
73
  ${ctx.when(
69
74
  ctx.incremental(),
70
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
71
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
75
+ incrementalWhere,
76
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
77
+ )}
78
+ `,
79
+ `
80
+ -- New a3 table (data from cutover onward)
81
+ select ${columns}
82
+ engaged_views,${restColumns}
83
+ from ${ctx.ref(t.database, t.schema, "p_channel_device_os_a3_" + t.suffix)}
84
+ ${ctx.when(
85
+ ctx.incremental(),
86
+ incrementalWhere,
87
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
72
88
  )}
73
- `;
89
+ `,
90
+ ];
74
91
  })
75
92
  .join(" union all "),
76
93
  );
@@ -40,37 +40,54 @@ module.exports = (config) => {
40
40
  })
41
41
  .preOps((ctx) => {
42
42
  // Get pre_operations to find updated source partitions
43
- return `${helpers.staging_preops(ctx, config, "p_playlist_basic_a1_")}`;
43
+ return `${helpers.staging_preops(ctx, config, ["p_playlist_basic_a1_", "p_playlist_basic_a2_"])}`;
44
44
  })
45
45
  .query((ctx) =>
46
46
  config.sources
47
- .map((t) => {
48
- return `
49
-
50
- select
47
+ .flatMap((t) => {
48
+ const columns = `
51
49
  _PARTITIONDATE as source_partition_date,
52
50
  parse_date('%Y%m%d', date) as data_date,
53
51
  "${t.schema}" as site_nm,
54
- current_timestamp() as updated_at,
52
+ current_timestamp() as updated_at,
55
53
  channel_id,
56
54
  playlist_id,
57
55
  video_id,
58
56
  live_or_on_demand,
59
57
  subscribed_status,
60
58
  country_code,
61
- views,
59
+ views,`;
60
+ const restColumns = `
62
61
  watch_time_minutes,
63
62
  average_view_duration_seconds,
64
63
  playlist_starts,
65
64
  playlist_saves_added,
66
- playlist_saves_removed
67
- from ${ctx.ref(t.database, t.schema, "p_playlist_basic_a1_" + t.suffix)}
65
+ playlist_saves_removed`;
66
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
67
+ return [
68
+ `
69
+ -- Deprecated a1 table (data before cutover)
70
+ select ${columns}
71
+ cast(null as int64) as engaged_views,${restColumns}
72
+ from ${ctx.ref(t.database, t.schema, "p_playlist_basic_a1_" + t.suffix)}
73
+ ${ctx.when(
74
+ ctx.incremental(),
75
+ incrementalWhere,
76
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
77
+ )}
78
+ `,
79
+ `
80
+ -- New a2 table (data from cutover onward)
81
+ select ${columns}
82
+ engaged_views,${restColumns}
83
+ from ${ctx.ref(t.database, t.schema, "p_playlist_basic_a2_" + t.suffix)}
68
84
  ${ctx.when(
69
85
  ctx.incremental(),
70
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
71
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
86
+ incrementalWhere,
87
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
72
88
  )}
73
- `;
89
+ `,
90
+ ];
74
91
  })
75
92
  .join(" union all "),
76
93
  );
@@ -40,13 +40,12 @@ module.exports = (config) => {
40
40
  })
41
41
  .preOps((ctx) => {
42
42
  // Get pre_operations to find updated source partitions
43
- return `${helpers.staging_preops(ctx, config, "p_playlist_combined_a1_")}`;
43
+ return `${helpers.staging_preops(ctx, config, ["p_playlist_combined_a1_", "p_playlist_combined_a2_"])}`;
44
44
  })
45
45
  .query((ctx) =>
46
46
  config.sources
47
- .map((t) => {
48
- return `
49
- select
47
+ .flatMap((t) => {
48
+ const columns = `
50
49
  _PARTITIONDATE as source_partition_date,
51
50
  parse_date('%Y%m%d', date) as data_date,
52
51
  "${t.schema}" as site_nm,
@@ -61,19 +60,38 @@ select
61
60
  traffic_source_type,
62
61
  device_type,
63
62
  operating_system,
64
- views,
63
+ views,`;
64
+ const restColumns = `
65
65
  watch_time_minutes,
66
66
  average_view_duration_seconds,
67
67
  playlist_starts,
68
68
  playlist_saves_added,
69
- playlist_saves_removed
69
+ playlist_saves_removed`;
70
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
71
+ return [
72
+ `
73
+ -- Deprecated a1 table (data before cutover)
74
+ select ${columns}
75
+ cast(null as int64) as engaged_views,${restColumns}
70
76
  from ${ctx.ref(t.database, t.schema, "p_playlist_combined_a1_" + t.suffix)}
71
77
  ${ctx.when(
72
78
  ctx.incremental(),
73
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
74
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
79
+ incrementalWhere,
80
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
75
81
  )}
76
- `;
82
+ `,
83
+ `
84
+ -- New a2 table (data from cutover onward)
85
+ select ${columns}
86
+ engaged_views,${restColumns}
87
+ from ${ctx.ref(t.database, t.schema, "p_playlist_combined_a2_" + t.suffix)}
88
+ ${ctx.when(
89
+ ctx.incremental(),
90
+ incrementalWhere,
91
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
92
+ )}
93
+ `,
94
+ ];
77
95
  })
78
96
  .join(" union all "),
79
97
  );
@@ -39,14 +39,12 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_playlist_device_os_a1_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_playlist_device_os_a1_", "p_playlist_device_os_a2_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
-
49
- select
46
+ .flatMap((t) => {
47
+ const columns = `
50
48
  _PARTITIONDATE as source_partition_date,
51
49
  parse_date('%Y%m%d', date) as data_date,
52
50
  "${t.schema}" as site_nm,
@@ -59,19 +57,38 @@ select
59
57
  country_code,
60
58
  device_type,
61
59
  operating_system,
62
- views,
60
+ views,`;
61
+ const restColumns = `
63
62
  watch_time_minutes,
64
63
  average_view_duration_seconds,
65
64
  playlist_starts,
66
65
  playlist_saves_added,
67
- playlist_saves_removed
66
+ playlist_saves_removed`;
67
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
68
+ return [
69
+ `
70
+ -- Deprecated a1 table (data before cutover)
71
+ select ${columns}
72
+ cast(null as int64) as engaged_views,${restColumns}
68
73
  from ${ctx.ref(t.database, t.schema, "p_playlist_device_os_a1_" + t.suffix)}
69
74
  ${ctx.when(
70
75
  ctx.incremental(),
71
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
72
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
76
+ incrementalWhere,
77
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
78
+ )}
79
+ `,
80
+ `
81
+ -- New a2 table (data from cutover onward)
82
+ select ${columns}
83
+ engaged_views,${restColumns}
84
+ from ${ctx.ref(t.database, t.schema, "p_playlist_device_os_a2_" + t.suffix)}
85
+ ${ctx.when(
86
+ ctx.incremental(),
87
+ incrementalWhere,
88
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
73
89
  )}
74
- `;
90
+ `,
91
+ ];
75
92
  })
76
93
  .join(" union all "),
77
94
  );
@@ -39,13 +39,12 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_playlist_playback_location_a1_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_playlist_playback_location_a1_", "p_playlist_playback_location_a2_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
- select
46
+ .flatMap((t) => {
47
+ const columns = `
49
48
  _PARTITIONDATE as source_partition_date,
50
49
  parse_date('%Y%m%d', date) as data_date,
51
50
  "${t.schema}" as site_nm,
@@ -58,19 +57,38 @@ select
58
57
  country_code,
59
58
  playback_location_type,
60
59
  playback_location_detail,
61
- views,
60
+ views,`;
61
+ const restColumns = `
62
62
  watch_time_minutes,
63
63
  average_view_duration_seconds,
64
64
  playlist_starts,
65
65
  playlist_saves_added,
66
- playlist_saves_removed
66
+ playlist_saves_removed`;
67
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
68
+ return [
69
+ `
70
+ -- Deprecated a1 table (data before cutover)
71
+ select ${columns}
72
+ cast(null as int64) as engaged_views,${restColumns}
67
73
  from ${ctx.ref(t.database, t.schema, "p_playlist_playback_location_a1_" + t.suffix)}
68
74
  ${ctx.when(
69
75
  ctx.incremental(),
70
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
71
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
76
+ incrementalWhere,
77
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
72
78
  )}
73
- `;
79
+ `,
80
+ `
81
+ -- New a2 table (data from cutover onward)
82
+ select ${columns}
83
+ engaged_views,${restColumns}
84
+ from ${ctx.ref(t.database, t.schema, "p_playlist_playback_location_a2_" + t.suffix)}
85
+ ${ctx.when(
86
+ ctx.incremental(),
87
+ incrementalWhere,
88
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
89
+ )}
90
+ `,
91
+ ];
74
92
  })
75
93
  .join(" union all "),
76
94
  );
@@ -40,17 +40,16 @@ module.exports = (config) => {
40
40
  })
41
41
  .preOps((ctx) => {
42
42
  // Get pre_operations to find updated source partitions
43
- return `${helpers.staging_preops(ctx, config, "p_playlist_province_a1_")}`;
43
+ return `${helpers.staging_preops(ctx, config, ["p_playlist_province_a1_", "p_playlist_province_a2_"])}`;
44
44
  })
45
45
  .query((ctx) =>
46
46
  config.sources
47
- .map((t) => {
48
- return `
49
- select
47
+ .flatMap((t) => {
48
+ const columns = `
50
49
  _PARTITIONDATE as source_partition_date,
51
50
  parse_date('%Y%m%d', date) as data_date,
52
51
  "${t.schema}" as site_nm,
53
- current_timestamp() as updated_at,
52
+ current_timestamp() as updated_at,
54
53
  channel_id,
55
54
  playlist_id,
56
55
  video_id,
@@ -58,19 +57,38 @@ select
58
57
  subscribed_status,
59
58
  country_code,
60
59
  province_code,
61
- views,
60
+ views,`;
61
+ const restColumns = `
62
62
  watch_time_minutes,
63
63
  average_view_duration_seconds,
64
64
  playlist_starts,
65
65
  playlist_saves_added,
66
- playlist_saves_removed
67
- from ${ctx.ref(t.database, t.schema, "p_playlist_province_a1_" + t.suffix)}
66
+ playlist_saves_removed`;
67
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
68
+ return [
69
+ `
70
+ -- Deprecated a1 table (data before cutover)
71
+ select ${columns}
72
+ cast(null as int64) as engaged_views,${restColumns}
73
+ from ${ctx.ref(t.database, t.schema, "p_playlist_province_a1_" + t.suffix)}
68
74
  ${ctx.when(
69
75
  ctx.incremental(),
70
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
71
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
76
+ incrementalWhere,
77
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
72
78
  )}
73
- `;
79
+ `,
80
+ `
81
+ -- New a2 table (data from cutover onward)
82
+ select ${columns}
83
+ engaged_views,${restColumns}
84
+ from ${ctx.ref(t.database, t.schema, "p_playlist_province_a2_" + t.suffix)}
85
+ ${ctx.when(
86
+ ctx.incremental(),
87
+ incrementalWhere,
88
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
89
+ )}
90
+ `,
91
+ ];
74
92
  })
75
93
  .join(" union all "),
76
94
  );
@@ -39,13 +39,12 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_playlist_traffic_source_a1_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_playlist_traffic_source_a1_", "p_playlist_traffic_source_a2_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
- select
46
+ .flatMap((t) => {
47
+ const columns = `
49
48
  _PARTITIONDATE as source_partition_date,
50
49
  parse_date('%Y%m%d', date) as data_date,
51
50
  "${t.schema}" as site_nm,
@@ -58,19 +57,38 @@ select
58
57
  country_code,
59
58
  traffic_source_type,
60
59
  traffic_source_detail,
61
- views,
60
+ views,`;
61
+ const restColumns = `
62
62
  watch_time_minutes,
63
63
  average_view_duration_seconds,
64
64
  playlist_starts,
65
65
  playlist_saves_added,
66
- playlist_saves_removed
66
+ playlist_saves_removed`;
67
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
68
+ return [
69
+ `
70
+ -- Deprecated a1 table (data before cutover)
71
+ select ${columns}
72
+ cast(null as int64) as engaged_views,${restColumns}
67
73
  from ${ctx.ref(t.database, t.schema, "p_playlist_traffic_source_a1_" + t.suffix)}
68
74
  ${ctx.when(
69
75
  ctx.incremental(),
70
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
71
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
76
+ incrementalWhere,
77
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
72
78
  )}
73
- `;
79
+ `,
80
+ `
81
+ -- New a2 table (data from cutover onward)
82
+ select ${columns}
83
+ engaged_views,${restColumns}
84
+ from ${ctx.ref(t.database, t.schema, "p_playlist_traffic_source_a2_" + t.suffix)}
85
+ ${ctx.when(
86
+ ctx.incremental(),
87
+ incrementalWhere,
88
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
89
+ )}
90
+ `,
91
+ ];
74
92
  })
75
93
  .join(" union all "),
76
94
  );
@@ -39,14 +39,12 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_channel_playback_location_a2_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_channel_playback_location_a2_", "p_channel_playback_location_a3_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
-
49
- select
46
+ .flatMap((t) => {
47
+ const columns = `
50
48
  _PARTITIONDATE as source_partition_date,
51
49
  parse_date('%Y%m%d', date) as data_date,
52
50
  "${t.schema}" as site_nm,
@@ -58,19 +56,38 @@ select
58
56
  country_code,
59
57
  playback_location_type,
60
58
  playback_location_detail,
61
- views,
59
+ views,`;
60
+ const restColumns = `
62
61
  watch_time_minutes,
63
62
  average_view_duration_seconds,
64
63
  average_view_duration_percentage,
65
64
  red_views,
66
- red_watch_time_minutes
65
+ red_watch_time_minutes`;
66
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
67
+ return [
68
+ `
69
+ -- Deprecated a2 table (data before cutover)
70
+ select ${columns}
71
+ cast(null as int64) as engaged_views,${restColumns}
67
72
  from ${ctx.ref(t.database, t.schema, "p_channel_playback_location_a2_" + t.suffix)}
68
73
  ${ctx.when(
69
74
  ctx.incremental(),
70
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
71
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
75
+ incrementalWhere,
76
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
77
+ )}
78
+ `,
79
+ `
80
+ -- New a3 table (data from cutover onward)
81
+ select ${columns}
82
+ engaged_views,${restColumns}
83
+ from ${ctx.ref(t.database, t.schema, "p_channel_playback_location_a3_" + t.suffix)}
84
+ ${ctx.when(
85
+ ctx.incremental(),
86
+ incrementalWhere,
87
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
72
88
  )}
73
- `;
89
+ `,
90
+ ];
74
91
  })
75
92
  .join(" union all "),
76
93
  );
@@ -40,13 +40,12 @@ module.exports = (config) => {
40
40
  })
41
41
  .preOps((ctx) => {
42
42
  // Get pre_operations to find updated source partitions
43
- return `${helpers.staging_preops(ctx, config, "p_channel_province_a2_")}`;
43
+ return `${helpers.staging_preops(ctx, config, ["p_channel_province_a2_", "p_channel_province_a3_"])}`;
44
44
  })
45
45
  .query((ctx) =>
46
46
  config.sources
47
- .map((t) => {
48
- return `
49
- select
47
+ .flatMap((t) => {
48
+ const columns = `
50
49
  _PARTITIONDATE as source_partition_date,
51
50
  parse_date('%Y%m%d', date) as data_date,
52
51
  "${t.schema}" as site_nm,
@@ -57,7 +56,8 @@ select
57
56
  subscribed_status,
58
57
  country_code,
59
58
  province_code,
60
- views,
59
+ views,`;
60
+ const restColumns = `
61
61
  watch_time_minutes,
62
62
  average_view_duration_seconds,
63
63
  average_view_duration_percentage,
@@ -75,14 +75,32 @@ select
75
75
  card_clicks,
76
76
  card_teaser_clicks,
77
77
  red_views,
78
- red_watch_time_minutes
79
- from ${ctx.ref(t.database, t.schema, "p_channel_province_a2_" + t.suffix)}
78
+ red_watch_time_minutes`;
79
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
80
+ return [
81
+ `
82
+ -- Deprecated a2 table (data before cutover)
83
+ select ${columns}
84
+ cast(null as int64) as engaged_views,${restColumns}
85
+ from ${ctx.ref(t.database, t.schema, "p_channel_province_a2_" + t.suffix)}
80
86
  ${ctx.when(
81
87
  ctx.incremental(),
82
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
83
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
88
+ incrementalWhere,
89
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
84
90
  )}
85
- `;
91
+ `,
92
+ `
93
+ -- New a3 table (data from cutover onward)
94
+ select ${columns}
95
+ engaged_views,${restColumns}
96
+ from ${ctx.ref(t.database, t.schema, "p_channel_province_a3_" + t.suffix)}
97
+ ${ctx.when(
98
+ ctx.incremental(),
99
+ incrementalWhere,
100
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
101
+ )}
102
+ `,
103
+ ];
86
104
  })
87
105
  .join(" union all "),
88
106
  );
@@ -39,15 +39,14 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_channel_subtitles_a2_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_channel_subtitles_a2_", "p_channel_subtitles_a3_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
- select
46
+ .flatMap((t) => {
47
+ const columns = `
49
48
  _PARTITIONDATE as source_partition_date,
50
- parse_date('%Y%m%d', date) as data_date,
49
+ parse_date('%Y%m%d', date) as data_date,
51
50
  "${t.schema}" as site_nm,
52
51
  current_timestamp() as updated_at,
53
52
  channel_id,
@@ -57,19 +56,38 @@ select
57
56
  country_code,
58
57
  subtitle_language,
59
58
  subtitle_language_autotranslated,
60
- views,
59
+ views,`;
60
+ const restColumns = `
61
61
  watch_time_minutes,
62
62
  average_view_duration_seconds,
63
63
  average_view_duration_percentage,
64
64
  red_views,
65
- red_watch_time_minutes
66
- from ${ctx.ref(t.database, t.schema, "p_channel_subtitles_a2_" + t.suffix)}
65
+ red_watch_time_minutes`;
66
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
67
+ return [
68
+ `
69
+ -- Deprecated a2 table (data before cutover)
70
+ select ${columns}
71
+ cast(null as int64) as engaged_views,${restColumns}
72
+ from ${ctx.ref(t.database, t.schema, "p_channel_subtitles_a2_" + t.suffix)}
67
73
  ${ctx.when(
68
74
  ctx.incremental(),
69
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
70
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
75
+ incrementalWhere,
76
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
71
77
  )}
72
- `;
78
+ `,
79
+ `
80
+ -- New a3 table (data from cutover onward)
81
+ select ${columns}
82
+ engaged_views,${restColumns}
83
+ from ${ctx.ref(t.database, t.schema, "p_channel_subtitles_a3_" + t.suffix)}
84
+ ${ctx.when(
85
+ ctx.incremental(),
86
+ incrementalWhere,
87
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
88
+ )}
89
+ `,
90
+ ];
73
91
  })
74
92
  .join(" union all "),
75
93
  );
@@ -39,13 +39,12 @@ module.exports = (config) => {
39
39
  })
40
40
  .preOps((ctx) => {
41
41
  // Get pre_operations to find updated source partitions
42
- return `${helpers.staging_preops(ctx, config, "p_channel_traffic_source_a2_")}`;
42
+ return `${helpers.staging_preops(ctx, config, ["p_channel_traffic_source_a2_", "p_channel_traffic_source_a3_"])}`;
43
43
  })
44
44
  .query((ctx) =>
45
45
  config.sources
46
- .map((t) => {
47
- return `
48
- select
46
+ .flatMap((t) => {
47
+ const columns = `
49
48
  _PARTITIONDATE as source_partition_date,
50
49
  parse_date('%Y%m%d', date) as data_date,
51
50
  "${t.schema}" as site_nm,
@@ -57,19 +56,38 @@ select
57
56
  country_code,
58
57
  traffic_source_type,
59
58
  traffic_source_detail,
60
- views,
59
+ views,`;
60
+ const restColumns = `
61
61
  watch_time_minutes,
62
62
  average_view_duration_seconds,
63
63
  average_view_duration_percentage,
64
64
  red_views,
65
- red_watch_time_minutes
65
+ red_watch_time_minutes`;
66
+ const incrementalWhere = `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`;
67
+ return [
68
+ `
69
+ -- Deprecated a2 table (data before cutover)
70
+ select ${columns}
71
+ cast(null as int64) as engaged_views,${restColumns}
66
72
  from ${ctx.ref(t.database, t.schema, "p_channel_traffic_source_a2_" + t.suffix)}
67
73
  ${ctx.when(
68
74
  ctx.incremental(),
69
- `where date(_PARTITIONTIME) in unnest((select partition_dates from unnest(partitions_to_update) where site_nm = "${t.schema}"))`,
70
- `where date(_PARTITIONTIME) > date(self_checkpoint)`,
75
+ incrementalWhere,
76
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) < date '2025-09-22'`,
71
77
  )}
72
- `;
78
+ `,
79
+ `
80
+ -- New a3 table (data from cutover onward)
81
+ select ${columns}
82
+ engaged_views,${restColumns}
83
+ from ${ctx.ref(t.database, t.schema, "p_channel_traffic_source_a3_" + t.suffix)}
84
+ ${ctx.when(
85
+ ctx.incremental(),
86
+ incrementalWhere,
87
+ `where date(_PARTITIONTIME) > date(self_checkpoint) and date(_PARTITIONTIME) >= date '2025-09-22'`,
88
+ )}
89
+ `,
90
+ ];
73
91
  })
74
92
  .join(" union all "),
75
93
  );
package/index.js CHANGED
@@ -107,12 +107,13 @@ module.exports = (config) => {
107
107
  };
108
108
 
109
109
  config.sources.map((s) => {
110
+ // Channel reports - deprecated a2 tables (data before 2025-09-22)
110
111
  declare({
111
112
  database: s.database,
112
113
  schema: s.schema,
113
114
  name: "p_channel_basic_a2_" + s.suffix,
114
115
  description:
115
- "This report provides user activity statistics related to a channel and its videos.",
116
+ "This report provides user activity statistics related to a channel and its videos. (Deprecated: replaced by a3)",
116
117
  });
117
118
 
118
119
  declare({
@@ -120,7 +121,7 @@ module.exports = (config) => {
120
121
  schema: s.schema,
121
122
  name: "p_channel_combined_a2_" + s.suffix,
122
123
  description:
123
- "This report provides user activity statistics related to a channel and its videos.",
124
+ "This report provides user activity statistics related to a channel and its videos. (Deprecated: replaced by a3)",
124
125
  });
125
126
 
126
127
  declare({
@@ -128,7 +129,7 @@ module.exports = (config) => {
128
129
  schema: s.schema,
129
130
  name: "p_channel_province_a2_" + s.suffix,
130
131
  description:
131
- "This report provides user activity statistics for U.S. states and the District of Columbia.",
132
+ "This report provides user activity statistics for U.S. states and the District of Columbia. (Deprecated: replaced by a3)",
132
133
  });
133
134
 
134
135
  declare({
@@ -136,7 +137,7 @@ module.exports = (config) => {
136
137
  schema: s.schema,
137
138
  name: "p_channel_playback_location_a2_" + s.suffix,
138
139
  description:
139
- "This report provides statistics related to the type of page or application where video playbacks occurred.",
140
+ "This report provides statistics related to the type of page or application where video playbacks occurred. (Deprecated: replaced by a3)",
140
141
  });
141
142
 
142
143
  declare({
@@ -144,17 +145,83 @@ module.exports = (config) => {
144
145
  schema: s.schema,
145
146
  name: "p_channel_traffic_source_a2_" + s.suffix,
146
147
  description:
147
- "This report aggregates viewing statistics based on the manner in which viewers reached the channel's video content.",
148
+ "This report aggregates viewing statistics based on the manner in which viewers reached the channel's video content. (Deprecated: replaced by a3)",
148
149
  });
149
150
 
150
151
  declare({
151
152
  database: s.database,
152
153
  schema: s.schema,
153
154
  name: "p_channel_device_os_a2_" + s.suffix,
155
+ description:
156
+ "This report aggregates video viewing statistics based on viewers' operating systems and device types. (Deprecated: replaced by a3)",
157
+ });
158
+
159
+ declare({
160
+ database: s.database,
161
+ schema: s.schema,
162
+ name: "p_channel_subtitles_a2_" + s.suffix,
163
+ description:
164
+ "This report provides statistics about the closed caption language usage during video views. (Deprecated: replaced by a3)",
165
+ });
166
+
167
+ // Channel reports - new a3 tables (data from 2025-09-22 onward)
168
+ declare({
169
+ database: s.database,
170
+ schema: s.schema,
171
+ name: "p_channel_basic_a3_" + s.suffix,
172
+ description:
173
+ "This report provides user activity statistics related to a channel and its videos.",
174
+ });
175
+
176
+ declare({
177
+ database: s.database,
178
+ schema: s.schema,
179
+ name: "p_channel_combined_a3_" + s.suffix,
180
+ description:
181
+ "This report provides user activity statistics related to a channel and its videos.",
182
+ });
183
+
184
+ declare({
185
+ database: s.database,
186
+ schema: s.schema,
187
+ name: "p_channel_province_a3_" + s.suffix,
188
+ description:
189
+ "This report provides user activity statistics for U.S. states and the District of Columbia.",
190
+ });
191
+
192
+ declare({
193
+ database: s.database,
194
+ schema: s.schema,
195
+ name: "p_channel_playback_location_a3_" + s.suffix,
196
+ description:
197
+ "This report provides statistics related to the type of page or application where video playbacks occurred.",
198
+ });
199
+
200
+ declare({
201
+ database: s.database,
202
+ schema: s.schema,
203
+ name: "p_channel_traffic_source_a3_" + s.suffix,
204
+ description:
205
+ "This report aggregates viewing statistics based on the manner in which viewers reached the channel's video content.",
206
+ });
207
+
208
+ declare({
209
+ database: s.database,
210
+ schema: s.schema,
211
+ name: "p_channel_device_os_a3_" + s.suffix,
154
212
  description:
155
213
  "This report aggregates video viewing statistics based on viewers' operating systems and device types.",
156
214
  });
157
215
 
216
+ declare({
217
+ database: s.database,
218
+ schema: s.schema,
219
+ name: "p_channel_subtitles_a3_" + s.suffix,
220
+ description:
221
+ "This report provides statistics about the closed caption language usage during video views.",
222
+ });
223
+
224
+ // Channel reports - unchanged (no version bump)
158
225
  declare({
159
226
  database: s.database,
160
227
  schema: s.schema,
@@ -195,18 +262,60 @@ module.exports = (config) => {
195
262
  "This report provides impressions and click-through statistics for end screen elements that display during a channel's videos.",
196
263
  });
197
264
 
265
+ // Playlist reports - deprecated a1 tables (data before 2025-09-22)
198
266
  declare({
199
267
  database: s.database,
200
268
  schema: s.schema,
201
- name: "p_channel_subtitles_a2_" + s.suffix,
269
+ name: "p_playlist_basic_a1_" + s.suffix,
202
270
  description:
203
- "This report provides statistics about the closed caption language usage during video views.",
271
+ "This report provides statistics related to users' interactions with a channel's playlists. (Deprecated: replaced by a2)",
204
272
  });
205
273
 
206
274
  declare({
207
275
  database: s.database,
208
276
  schema: s.schema,
209
- name: "p_playlist_basic_a1_" + s.suffix,
277
+ name: "p_playlist_device_os_a1_" + s.suffix,
278
+ description:
279
+ "This report aggregates playlist viewing statistics based on viewers' operating systems and device types. (Deprecated: replaced by a2)",
280
+ });
281
+
282
+ declare({
283
+ database: s.database,
284
+ schema: s.schema,
285
+ name: "p_playlist_province_a1_" + s.suffix,
286
+ description:
287
+ "This report provides user activity statistics related to users' interactions with a channel's playlists for U.S. states and the District of Columbia. (Deprecated: replaced by a2)",
288
+ });
289
+
290
+ declare({
291
+ database: s.database,
292
+ schema: s.schema,
293
+ name: "p_playlist_playback_location_a1_" + s.suffix,
294
+ description:
295
+ "This report provides statistics related to the type of page or application where playlist playbacks occurred. (Deprecated: replaced by a2)",
296
+ });
297
+
298
+ declare({
299
+ database: s.database,
300
+ schema: s.schema,
301
+ name: "p_playlist_traffic_source_a1_" + s.suffix,
302
+ description:
303
+ "This report aggregates viewing statistics based on the manner in which viewers reached a channel's playlist videos. (Deprecated: replaced by a2)",
304
+ });
305
+
306
+ declare({
307
+ database: s.database,
308
+ schema: s.schema,
309
+ name: "p_playlist_combined_a1_" + s.suffix,
310
+ description:
311
+ "This report provides fine-grained playlist statistics by combining dimensions used in the playback location, traffic source, and device/OS reports. (Deprecated: replaced by a2)",
312
+ });
313
+
314
+ // Playlist reports - new a2 tables (data from 2025-09-22 onward)
315
+ declare({
316
+ database: s.database,
317
+ schema: s.schema,
318
+ name: "p_playlist_basic_a2_" + s.suffix,
210
319
  description:
211
320
  "This report provides statistics related to users' interactions with a channel's playlists.",
212
321
  });
@@ -214,7 +323,7 @@ module.exports = (config) => {
214
323
  declare({
215
324
  database: s.database,
216
325
  schema: s.schema,
217
- name: "p_playlist_device_os_a1_" + s.suffix,
326
+ name: "p_playlist_device_os_a2_" + s.suffix,
218
327
  description:
219
328
  "This report aggregates playlist viewing statistics based on viewers' operating systems and device types.",
220
329
  });
@@ -222,7 +331,7 @@ module.exports = (config) => {
222
331
  declare({
223
332
  database: s.database,
224
333
  schema: s.schema,
225
- name: "p_playlist_province_a1_" + s.suffix,
334
+ name: "p_playlist_province_a2_" + s.suffix,
226
335
  description:
227
336
  "This report provides user activity statistics related to users' interactions with a channel's playlists for U.S. states and the District of Columbia.",
228
337
  });
@@ -230,7 +339,7 @@ module.exports = (config) => {
230
339
  declare({
231
340
  database: s.database,
232
341
  schema: s.schema,
233
- name: "p_playlist_playback_location_a1_" + s.suffix,
342
+ name: "p_playlist_playback_location_a2_" + s.suffix,
234
343
  description:
235
344
  "This report provides statistics related to the type of page or application where playlist playbacks occurred.",
236
345
  });
@@ -238,7 +347,7 @@ module.exports = (config) => {
238
347
  declare({
239
348
  database: s.database,
240
349
  schema: s.schema,
241
- name: "p_playlist_traffic_source_a1_" + s.suffix,
350
+ name: "p_playlist_traffic_source_a2_" + s.suffix,
242
351
  description:
243
352
  "This report aggregates viewing statistics based on the manner in which viewers reached a channel's playlist videos.",
244
353
  });
@@ -246,7 +355,7 @@ module.exports = (config) => {
246
355
  declare({
247
356
  database: s.database,
248
357
  schema: s.schema,
249
- name: "p_playlist_combined_a1_" + s.suffix,
358
+ name: "p_playlist_combined_a2_" + s.suffix,
250
359
  description:
251
360
  "This report provides fine-grained playlist statistics by combining dimensions used in the playback location, traffic source, and device/OS reports.",
252
361
  });
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "@ken-e/dataform-youtube",
3
3
  "dependencies": {
4
4
  "@dataform/core": "3.0.9",
5
- "@ken-e/dataform-helpers": "0.0.9"
5
+ "@ken-e/dataform-helpers": "0.0.11"
6
6
  },
7
7
  "devDependencies": {
8
8
  "@eslint/js": "^9.18.0",
@@ -11,7 +11,7 @@
11
11
  "globals": "^15.14.0",
12
12
  "prettier": "3.4.2"
13
13
  },
14
- "version": "0.0.9",
14
+ "version": "0.0.11",
15
15
  "description": "A dataform package to process Youtube data from the BQ Data Transfer.",
16
16
  "main": "index.js",
17
17
  "scripts": {