make-mp-data 2.1.11 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +31 -0
  2. package/dungeons/adspend.js +35 -1
  3. package/dungeons/anon.js +25 -1
  4. package/dungeons/array-of-object-lookup.js +201 -0
  5. package/dungeons/benchmark-heavy.js +241 -0
  6. package/dungeons/benchmark-light.js +141 -0
  7. package/dungeons/big.js +10 -9
  8. package/dungeons/business.js +60 -12
  9. package/dungeons/complex.js +35 -1
  10. package/dungeons/copilot.js +383 -0
  11. package/dungeons/education.js +1005 -0
  12. package/dungeons/experiments.js +18 -4
  13. package/dungeons/fintech.js +976 -0
  14. package/dungeons/foobar.js +32 -0
  15. package/dungeons/food.js +988 -0
  16. package/dungeons/funnels.js +38 -1
  17. package/dungeons/gaming.js +26 -5
  18. package/dungeons/media.js +861 -270
  19. package/dungeons/mil.js +31 -3
  20. package/dungeons/mirror.js +33 -1
  21. package/dungeons/retention-cadence.js +211 -0
  22. package/dungeons/rpg.js +1178 -0
  23. package/dungeons/sanity.js +32 -2
  24. package/dungeons/sass.js +923 -0
  25. package/dungeons/scd.js +47 -1
  26. package/dungeons/simple.js +29 -14
  27. package/dungeons/social.js +928 -0
  28. package/dungeons/streaming.js +373 -0
  29. package/dungeons/strict-event-test.js +30 -0
  30. package/dungeons/student-teacher.js +19 -5
  31. package/dungeons/text-generation.js +120 -84
  32. package/dungeons/too-big-events.js +203 -0
  33. package/dungeons/{userAgent.js → user-agent.js} +23 -2
  34. package/entry.js +5 -4
  35. package/index.js +41 -54
  36. package/lib/core/config-validator.js +122 -7
  37. package/lib/core/context.js +7 -14
  38. package/lib/core/storage.js +57 -25
  39. package/lib/generators/adspend.js +12 -12
  40. package/lib/generators/events.js +6 -5
  41. package/lib/generators/funnels.js +32 -10
  42. package/lib/generators/product-lookup.js +262 -0
  43. package/lib/generators/product-names.js +195 -0
  44. package/lib/generators/profiles.js +3 -3
  45. package/lib/generators/scd.js +13 -3
  46. package/lib/generators/text.js +17 -4
  47. package/lib/orchestrators/mixpanel-sender.js +244 -204
  48. package/lib/orchestrators/user-loop.js +54 -16
  49. package/lib/templates/phrases.js +473 -16
  50. package/lib/templates/schema.d.ts +173 -0
  51. package/lib/templates/verbose-schema.js +140 -206
  52. package/lib/utils/chart.js +210 -0
  53. package/lib/utils/function-registry.js +285 -0
  54. package/lib/utils/json-evaluator.js +172 -0
  55. package/lib/utils/logger.js +34 -0
  56. package/lib/utils/utils.js +41 -4
  57. package/package.json +12 -21
  58. package/types.d.ts +15 -5
  59. package/dungeons/ai-chat-analytics-ed.js +0 -274
  60. package/dungeons/money2020-ed-also.js +0 -277
  61. package/dungeons/money2020-ed.js +0 -579
  62. package/lib/generators/text-bak-old.js +0 -1121
  63. package/lib/orchestrators/worker-manager.js +0 -203
  64. package/lib/templates/hooks-instructions.txt +0 -434
  65. package/lib/templates/phrases-bak.js +0 -925
  66. package/lib/templates/prompt (old).txt +0 -98
  67. package/lib/templates/schema-instructions.txt +0 -155
  68. package/lib/templates/scratch-dungeon-template.js +0 -116
  69. package/lib/templates/textQuickTest.js +0 -172
  70. package/lib/utils/ai.js +0 -120
  71. package/lib/utils/project.js +0 -166
package/dungeons/media.js CHANGED
@@ -1,40 +1,87 @@
1
+ import dayjs from "dayjs";
2
+ import utc from "dayjs/plugin/utc.js";
3
+ import "dotenv/config";
4
+ import * as u from "../lib/utils/utils.js";
5
+ import * as v from "ak-tools";
1
6
 
2
- const SEED = "my-seed";
3
- import dayjs from 'dayjs';
4
- import utc from 'dayjs/plugin/utc.js';
7
+ const SEED = "harness-media";
5
8
  dayjs.extend(utc);
6
- import 'dotenv/config';
7
- import * as u from '../lib/utils/utils.js';
8
- import * as v from 'ak-tools';
9
9
  const chance = u.initChance(SEED);
10
- const num_users = 10_000;
11
- const days = 125;
10
+ const num_users = 5_000;
11
+ const days = 100;
12
12
 
13
- /** @typedef {import("../types.js").Dungeon} Config */
13
+ /** @typedef {import("../../types.js").Dungeon} Config */
14
14
 
15
- function genIds(numIds = 1000) {
16
- const ids = [];
17
- for (let i = 0; i < numIds; i++) {
18
- ids.push(v.uid());
19
- }
20
- return ids;
21
- }
15
+ /**
16
+ * STREAMVAULT - Video Streaming Platform Analytics
17
+ *
18
+ * StreamVault is a Netflix/Hulu-style video streaming platform where users browse a rich
19
+ * catalog of movies, series, documentaries, and specials. Users manage watchlists, watch
20
+ * content with configurable playback options, rate and share content, and manage family
21
+ * profiles under a single account. Monetization runs through tiered subscriptions:
22
+ * Free (ad-supported), Standard (ad-free HD), and Premium (4K, offline downloads, 5 profiles).
23
+ *
24
+ * CONTENT DISCOVERY:
25
+ * Users land on a personalized home screen with sections like "Continue Watching",
26
+ * "Trending Now", "New Releases", and genre-based rows. A recommendation engine (using
27
+ * collaborative filtering, content-based, trending, and editorial algorithms) surfaces
28
+ * relevant content. Users can also search by title, actor, director, or genre. The browse
29
+ * and search patterns reveal how users navigate the catalog and which discovery paths
30
+ * lead to actual viewership.
31
+ *
32
+ * PLAYBACK EXPERIENCE:
33
+ * Once content is selected, playback begins with quality auto-selected based on connection
34
+ * speed (480p to 4K). Users can adjust playback speed, toggle subtitles in multiple
35
+ * languages, and pause/resume. Playback completion percentage and watch duration are
36
+ * tracked to understand engagement depth. Some users exhibit "binge-watching" behavior,
37
+ * consuming multiple episodes consecutively with high completion rates and minimal pausing.
38
+ *
39
+ * PROFILE MANAGEMENT:
40
+ * A single account supports multiple profiles: main, kids, partner, and guest. The kids
41
+ * profile has content restrictions (only animation and documentaries, no ads). Profile
42
+ * switching events reveal household composition and viewing patterns across family members.
43
+ *
44
+ * MONETIZATION MODEL:
45
+ * - Free tier: Ad-supported with pre-roll, mid-roll, banner, and interstitial ads. Users
46
+ * on this tier experience ad fatigue over time, which drives churn or upgrades.
47
+ * - Standard tier ($9.99/mo): Ad-free viewing in HD, 2 simultaneous streams.
48
+ * - Premium tier ($14.99/mo): 4K streaming, offline downloads, 5 profiles, early access.
49
+ * - Subscription changes (upgrades, downgrades, cancellations, resubscriptions) are tracked
50
+ * with reasons to understand the lifecycle of subscriber value.
51
+ *
52
+ * CONTENT ENGAGEMENT:
53
+ * Users rate content (1-5 stars with optional review text), add/remove items from their
54
+ * watchlist, share content via link/social/DM/email, and download content for offline
55
+ * viewing. These engagement signals feed back into the recommendation engine and reveal
56
+ * content quality and user satisfaction patterns.
57
+ *
58
+ * WHY THESE EVENTS/PROPERTIES?
59
+ * - Events model the full streaming lifecycle: onboarding -> discovery -> consumption -> engagement -> monetization
60
+ * - Properties enable cohort analysis: subscription tier, device type, genre preference, viewing patterns
61
+ * - Funnels reveal friction: where do users drop off between browsing, selecting, starting, and finishing content?
62
+ * - The recommendation engine creates measurable A/B-testable discovery paths
63
+ * - Ad impression tracking enables fatigue analysis and churn prediction for free-tier users
64
+ * - Profile switching reveals household dynamics and kids safety patterns
65
+ * - The 8 "needle in haystack" hooks simulate real product insights hidden in production data
66
+ */
22
67
 
23
- const videoIds = genIds();
24
- const channelIds = genIds(100);
68
+ // Generate consistent content IDs for lookup tables and events
69
+ const contentIds = v.range(1, 501).map(n => `content_${v.uid(8)}`);
70
+ const blockbusterId = `blockbuster_${v.uid(8)}`;
25
71
 
26
72
  /** @type {Config} */
27
73
  const config = {
28
74
  token: "",
29
- seed: `LFG!`, //,
75
+ seed: SEED,
30
76
  numDays: days,
31
- numEvents: num_users * 63,
77
+ numEvents: num_users * 120,
32
78
  numUsers: num_users,
33
79
  hasAnonIds: false,
34
- hasSessionIds: false,
80
+ hasSessionIds: true,
35
81
  format: "json",
82
+ gzip: true,
36
83
  alsoInferFunnels: false,
37
- hasLocation: false,
84
+ hasLocation: true,
38
85
  hasAndroidDevices: true,
39
86
  hasIOSDevices: true,
40
87
  hasDesktopDevices: true,
@@ -42,325 +89,544 @@ const config = {
42
89
  hasCampaigns: false,
43
90
  isAnonymous: false,
44
91
  hasAdSpend: false,
45
-
46
- hasAvatar: false,
47
-
48
- batchSize: 1_500_000,
92
+ percentUsersBornInDataset: 50,
93
+ hasAvatar: true,
94
+ makeChart: false,
95
+ batchSize: 2_500_000,
49
96
  concurrency: 10,
50
- writeToDisk: true,
97
+ writeToDisk: false,
51
98
 
52
- funnels: [],
99
+ funnels: [
100
+ {
101
+ sequence: ["account created", "content browsed", "playback started"],
102
+ isFirstFunnel: true,
103
+ conversionRate: 80,
104
+ timeToConvert: 0.25,
105
+ },
106
+ {
107
+ // Core viewing loop: browse → select → watch → finish (most common)
108
+ sequence: ["content browsed", "content selected", "playback started", "playback completed"],
109
+ conversionRate: 55,
110
+ timeToConvert: 2,
111
+ weight: 5,
112
+ },
113
+ {
114
+ // Recommendation-driven viewing
115
+ sequence: ["recommendation clicked", "playback started", "playback completed", "content rated"],
116
+ conversionRate: 35,
117
+ timeToConvert: 1,
118
+ weight: 3,
119
+ },
120
+ {
121
+ // Search-driven discovery
122
+ sequence: ["search performed", "content selected", "playback started"],
123
+ conversionRate: 50,
124
+ timeToConvert: 0.5,
125
+ weight: 3,
126
+ },
127
+ {
128
+ // Watchlist management
129
+ sequence: ["content browsed", "watchlist added", "content selected", "playback started"],
130
+ conversionRate: 40,
131
+ timeToConvert: 12,
132
+ weight: 2,
133
+ },
134
+ {
135
+ // Profile and subtitle management
136
+ sequence: ["profile switched", "subtitle toggled", "playback started", "playback completed"],
137
+ conversionRate: 45,
138
+ timeToConvert: 1,
139
+ weight: 2,
140
+ },
141
+ {
142
+ // Ad experience (free tier)
143
+ sequence: ["ad impression", "playback started", "playback paused"],
144
+ conversionRate: 60,
145
+ timeToConvert: 0.5,
146
+ weight: 2,
147
+ },
148
+ {
149
+ // Content sharing and downloads
150
+ sequence: ["playback completed", "share content", "download started"],
151
+ conversionRate: 25,
152
+ timeToConvert: 1,
153
+ weight: 1,
154
+ },
155
+ {
156
+ // Subscription changes
157
+ sequence: ["content browsed", "subscription changed"],
158
+ conversionRate: 15,
159
+ timeToConvert: 24,
160
+ weight: 1,
161
+ },
162
+ ],
53
163
 
54
164
  events: [
55
165
  {
56
- event: "watch video",
57
- weight: 55,
166
+ event: "account created",
167
+ weight: 1,
168
+ isFirstEvent: true,
169
+ properties: {
170
+ "signup_source": u.pickAWinner(["organic", "referral", "trial_offer", "ad"]),
171
+ "plan_selected": u.pickAWinner(["free", "standard", "premium"]),
172
+ }
173
+ },
174
+ {
175
+ event: "content browsed",
176
+ weight: 20,
177
+ properties: {
178
+ "browse_section": u.pickAWinner(["home", "trending", "new_releases", "genre", "continue_watching"]),
179
+ "genre": u.pickAWinner(["action", "comedy", "drama", "documentary", "horror", "sci_fi", "animation", "thriller", "romance"]),
180
+ }
181
+ },
182
+ {
183
+ event: "content selected",
184
+ weight: 15,
185
+ properties: {
186
+ "content_type": u.pickAWinner(["movie", "series", "documentary", "special"]),
187
+ "genre": u.pickAWinner(["action", "comedy", "drama", "documentary", "horror", "sci_fi", "animation", "thriller", "romance"]),
188
+ "content_id": u.pickAWinner(contentIds),
189
+ }
190
+ },
191
+ {
192
+ event: "playback started",
193
+ weight: 18,
58
194
  properties: {
59
- video_id: u.pickAWinner(videoIds),
60
- "watch percent": u.pickAWinner([
61
- 25,
62
- 50,
63
- 75,
64
- 100,
65
- ]),
66
- "watch time": u.weighNumRange(1, 65, .89, 100),
67
-
68
- "category": u.pickAWinner([
69
- "comedy",
70
- "educational",
71
- "music",
72
- "sports",
73
- "news",
74
- "gaming",
75
- "travel",
76
- ]),
77
- quality: u.pickAWinner([
78
- "240p",
79
- "360p",
80
- "480p",
81
- "720p",
82
- "1080p",
83
- "4k",
84
- ], 4),
85
- autoplay: [
86
- true,
87
- false,
88
- ],
89
- fullscreen: [
90
- true,
91
- false,
92
- ],
93
- "ads?": [
94
- true, true,
95
- false,
96
- ],
97
- },
195
+ "content_id": u.pickAWinner(contentIds),
196
+ "content_type": u.pickAWinner(["movie", "series", "documentary", "special"]),
197
+ "playback_quality": u.pickAWinner(["480p", "720p", "1080p", "4k"]),
198
+ "subtitle_language": u.pickAWinner(["none", "english", "spanish", "french", "japanese", "korean"], 0),
199
+ "playback_speed": u.pickAWinner(["0.5x", "1x", "1x", "1x", "1.25x", "1.5x", "2x"]),
200
+ }
201
+ },
202
+ {
203
+ event: "playback completed",
204
+ weight: 12,
205
+ properties: {
206
+ "content_id": u.pickAWinner(contentIds),
207
+ "content_type": u.pickAWinner(["movie", "series", "documentary", "special"]),
208
+ "watch_duration_min": u.weighNumRange(5, 180, 0.5, 45),
209
+ "completion_percent": u.weighNumRange(10, 100, 1.5, 85),
210
+ }
98
211
  },
99
212
  {
100
- event: "like",
213
+ event: "playback paused",
101
214
  weight: 10,
102
215
  properties: {
103
- video_id: u.pickAWinner(videoIds),
104
- },
216
+ "content_id": u.pickAWinner(contentIds),
217
+ "pause_reason": u.pickAWinner(["manual", "ad_break", "buffering", "notification"]),
218
+ }
105
219
  },
106
220
  {
107
- event: "comment",
108
- weight: 5,
221
+ event: "content rated",
222
+ weight: 6,
109
223
  properties: {
110
- video_id: u.pickAWinner(videoIds),
111
- comment_length: [
112
- "short",
113
- "medium",
114
- "long",
115
- ],
116
- },
224
+ "content_id": u.pickAWinner(contentIds),
225
+ "rating": u.weighNumRange(1, 5, 2, 4),
226
+ "review_text_length": u.weighNumRange(0, 500, 0.2, 0),
227
+ }
117
228
  },
118
229
  {
119
- event: "share",
120
- weight: 3,
230
+ event: "watchlist added",
231
+ weight: 8,
121
232
  properties: {
122
- video_id: u.pickAWinner(videoIds),
123
- "share network": u.pickAWinner([
124
- "facebook",
125
- "twitter",
126
- "reddit",
127
- "email",
128
- "whatsapp",
129
- ]),
130
- },
233
+ "content_id": u.pickAWinner(contentIds),
234
+ "content_type": u.pickAWinner(["movie", "series", "documentary", "special"]),
235
+ "genre": u.pickAWinner(["action", "comedy", "drama", "documentary", "horror", "sci_fi", "animation", "thriller", "romance"]),
236
+ }
131
237
  },
132
238
  {
133
- event: "search",
134
- weight: 25,
239
+ event: "watchlist removed",
240
+ weight: 3,
135
241
  properties: {
136
- search_term: [
137
- "cats",
138
- "dogs",
139
- "tutorial",
140
- "news",
141
- "music",
142
- ],
143
- "results count": u.pickAWinner([
144
- 0,
145
- 1,
146
- 2,
147
- 3,
148
- 4,
149
- 5,
150
- 6, 7, 8, 9, 10
151
- ], 5),
152
- "search category": [
153
- "all",
154
- "channels",
155
- "playlists",
156
- ],
157
- },
242
+ "content_id": u.pickAWinner(contentIds),
243
+ "reason": u.pickAWinner(["watched", "not_interested", "expired"]),
244
+ }
158
245
  },
159
246
  {
160
- event: "subscribe",
247
+ event: "search performed",
161
248
  weight: 7,
162
249
  properties: {
163
- channel_id: u.pickAWinner(genIds()),
164
- },
250
+ "search_term": () => chance.word(),
251
+ "results_count": u.weighNumRange(0, 50, 0.5, 15),
252
+ "search_type": u.pickAWinner(["title", "actor", "director", "genre"]),
253
+ }
165
254
  },
166
255
  {
167
- event: "unsubscribe",
168
- weight: 2,
256
+ event: "recommendation clicked",
257
+ weight: 9,
169
258
  properties: {
170
- channel_id: u.pickAWinner(genIds()),
171
- },
259
+ "algorithm": u.pickAWinner(["collaborative_filtering", "content_based", "trending", "editorial"]),
260
+ "position": u.weighNumRange(1, 20),
261
+ }
172
262
  },
173
263
  {
174
- event: "create playlist",
264
+ event: "profile switched",
175
265
  weight: 4,
176
266
  properties: {
177
- "play list name": u.pickAWinner([
178
- "favorites",
179
- "watch later",
180
- "my music",
181
- "funny videos",
182
- "educational",
183
- ]),
184
- privacy: u.pickAWinner([
185
- "public",
186
- "private",
187
- "unlisted",
188
- ]),
189
- },
267
+ "profile_type": u.pickAWinner(["main", "kids", "partner", "guest"]),
268
+ }
190
269
  },
191
270
  {
192
- event: "account signup",
193
- weight: 1,
194
- isFirstEvent: true,
271
+ event: "ad impression",
272
+ weight: 8,
195
273
  properties: {
196
- "sign up method": [
197
- "email",
198
- "google",
199
- "facebook",
200
- ],
201
- },
274
+ "ad_type": u.pickAWinner(["pre_roll", "mid_roll", "banner", "interstitial"]),
275
+ "ad_duration_sec": u.weighNumRange(5, 30),
276
+ "skipped": u.pickAWinner([true, false], 0.4),
277
+ }
202
278
  },
203
279
  {
204
- event: "account login",
205
- weight: 9,
280
+ event: "subscription changed",
281
+ weight: 2,
206
282
  properties: {
207
- "log in method": u.pickAWinner([
208
- "email",
209
- "google",
210
- "facebook",
211
- ]),
212
- success: [
213
- true,
214
- false,
215
- ],
216
- error_message: [
217
- "incorrect password",
218
- "user not found",
219
- "account locked",
220
- ],
221
- },
283
+ "old_plan": u.pickAWinner(["free", "standard", "premium"]),
284
+ "new_plan": u.pickAWinner(["free", "standard", "premium"]),
285
+ "change_reason": u.pickAWinner(["upgrade", "downgrade", "cancel", "resubscribe"]),
286
+ }
222
287
  },
223
288
  {
224
- event: "$experiment_started",
289
+ event: "download started",
225
290
  weight: 5,
226
- isSessionStartEvent: true,
227
291
  properties: {
228
- "$experiment_type": "ak_ad_hoc",
229
- "Experiment name": "show results on empty search",
230
- "Variant name": ["feature enabled", "feature disabled"],
292
+ "content_id": u.pickAWinner(contentIds),
293
+ "content_type": u.pickAWinner(["movie", "series", "documentary", "special"]),
294
+ "download_quality": u.pickAWinner(["720p", "1080p", "4k"]),
231
295
  }
232
- }
296
+ },
297
+ {
298
+ event: "share content",
299
+ weight: 3,
300
+ properties: {
301
+ "share_method": u.pickAWinner(["link", "social", "dm", "email"]),
302
+ "content_type": u.pickAWinner(["movie", "series", "documentary", "special"]),
303
+ }
304
+ },
305
+ {
306
+ event: "subtitle toggled",
307
+ weight: 4,
308
+ properties: {
309
+ "subtitle_language": u.pickAWinner(["none", "english", "spanish", "french", "japanese", "korean"], 0),
310
+ "action": u.pickAWinner(["enabled", "disabled", "changed"]),
311
+ }
312
+ },
233
313
  ],
314
+
234
315
  superProps: {
235
- platform: u.pickAWinner([
236
- "web",
237
- "ios",
238
- "android",
239
- ]),
240
- network_type: [
241
- "wifi",
242
- "cellular",
243
- ],
316
+ subscription_plan: u.pickAWinner(["free", "free", "standard", "standard", "standard", "premium"]),
317
+ device_type: u.pickAWinner(["smart_tv", "mobile", "tablet", "laptop", "desktop"]),
244
318
  },
319
+
320
+ scdProps: {},
321
+
245
322
  userProps: {
246
- subscription_status: [
247
- "free",
248
- "free",
249
- "premium",
250
- ],
251
- age_range: [
252
- "13-17",
253
- "18-24",
254
- "25-34",
255
- "35-44",
256
- "45-54",
257
- "55+",
258
- ],
259
- preferred_genre: u.pickAWinner([
260
- "comedy",
261
- "action",
262
- "drama",
263
- "sci-fi",
264
- "horror",
265
- ]),
266
- upload_count: [
267
- 0,
268
- 1,
269
- 5,
270
- 10,
271
- 20,
272
- ],
273
- following_count: [
274
- 0,
275
- 10,
276
- 50,
277
- 100,
278
- 500,
279
- ],
280
- dark_mode_enabled: [
281
- true,
282
- false,
283
- ],
323
+ "preferred_genre": u.pickAWinner(["action", "comedy", "drama", "documentary", "horror", "sci_fi", "animation"]),
324
+ "avg_session_duration_min": u.weighNumRange(10, 180, 0.5, 45),
325
+ "total_watch_hours": u.weighNumRange(0, 500, 0.8, 50),
326
+ "profiles_count": u.weighNumRange(1, 5),
327
+ "downloads_enabled": u.pickAWinner([true, false], 0.4),
284
328
  },
285
329
 
286
- scdProps: {},
287
- mirrorProps: {},
288
- groupKeys: [],
289
- groupProps: {},
290
330
  lookupTables: [],
331
+
332
+ /**
333
+ * ARCHITECTED ANALYTICS HOOKS
334
+ *
335
+ * This hook function creates 8 deliberate patterns in the data:
336
+ *
337
+ * 1. GENRE FUNNEL CONVERSION: Comedy/Animation complete more; Documentary abandons more (funnel-pre)
338
+ * 2. BINGE-WATCHING: Users with 3+ consecutive completions get extra episodes (everything)
339
+ * 3. WEEKEND vs WEEKDAY: Weekend sessions 1.5x longer; weekday prime-time tagging (event)
340
+ * 4. AD FATIGUE CHURN: Free-tier users with 20+ ads churn after day 45 (everything)
341
+ * 5. NEW RELEASE SPIKE: Blockbuster release on day 50 drives content selection (event)
342
+ * 6. KIDS PROFILE SAFETY: Kids mode restricts genres and drops ads (event)
343
+ * 7. RECOMMENDATION ENGINE IMPROVEMENT: Post-day-60 boost to engagement funnel (funnel-pre)
344
+ * 8. SUBTITLE USERS WATCH MORE: Subtitle-enabled users have higher completion rates (everything)
345
+ */
291
346
  hook: function (record, type, meta) {
292
347
  const NOW = dayjs();
293
- const TIME_WHEN_SEARCH_GOT_BAD = NOW.subtract(21, 'days');
294
- const TIME_WE_EXPERIMENTED = NOW.subtract(14, 'days');
348
+ const DATASET_START = NOW.subtract(days, 'days');
349
+
350
+ // ─────────────────────────────────────────────────────────────
351
+ // Hook #1: GENRE FUNNEL CONVERSION (funnel-pre)
352
+ // Comedy/Animation funnels convert 1.3x better; Documentary 0.7x
353
+ // ─────────────────────────────────────────────────────────────
354
+ if (type === "funnel-pre") {
355
+ const props = record.props || {};
356
+ const genre = props.genre;
295
357
 
358
+ if (genre === "comedy" || genre === "animation") {
359
+ record.conversionRate = record.conversionRate * 1.3;
360
+ record.genre_boost = true;
361
+ record.genre_penalty = false;
362
+ } else if (genre === "documentary") {
363
+ record.conversionRate = record.conversionRate * 0.7;
364
+ record.genre_boost = false;
365
+ record.genre_penalty = true;
366
+ } else if (!genre && chance.bool({ likelihood: 25 })) {
367
+ // Randomly apply genre effects when genre isn't in funnel props
368
+ if (chance.bool({ likelihood: 60 })) {
369
+ record.conversionRate = record.conversionRate * 1.3;
370
+ record.genre_boost = true;
371
+ record.genre_penalty = false;
372
+ } else {
373
+ record.conversionRate = record.conversionRate * 0.7;
374
+ record.genre_boost = false;
375
+ record.genre_penalty = true;
376
+ }
377
+ } else {
378
+ record.genre_boost = false;
379
+ record.genre_penalty = false;
380
+ }
381
+
382
+ // ─────────────────────────────────────────────────────────────
383
+ // Hook #7: RECOMMENDATION ENGINE IMPROVEMENT (funnel-pre)
384
+ // After day 60 (proxied by 50% chance), engagement funnel gets 1.5x boost
385
+ // ─────────────────────────────────────────────────────────────
386
+ const seq = record.sequence || [];
387
+ const isEngagementFunnel = seq.includes("recommendation clicked");
388
+
389
+ if (isEngagementFunnel && chance.bool({ likelihood: 50 })) {
390
+ record.conversionRate = record.conversionRate * 1.5;
391
+ record.improved_recs = true;
392
+ } else {
393
+ record.improved_recs = false;
394
+ }
395
+
396
+ return record;
397
+ }
398
+
399
+ // ─────────────────────────────────────────────────────────────
400
+ // Hook #3: WEEKEND vs WEEKDAY PATTERNS (event)
401
+ // Weekend: 1.5x watch duration. Weekday 6PM-11PM: prime_time tag
402
+ // ─────────────────────────────────────────────────────────────
296
403
  if (type === "event") {
297
404
  const EVENT_TIME = dayjs(record.time);
298
- //when search got bad, people started searching less
299
- //and got fewer results
300
- if (EVENT_TIME.isAfter(TIME_WHEN_SEARCH_GOT_BAD)) {
301
- if (chance.bool({ likelihood: 50 })) {
302
- if (record.event === "search") {
303
- record["results count"] = 0;
304
- }
405
+ const dayOfWeek = EVENT_TIME.day(); // 0 = Sunday, 6 = Saturday
406
+ const hour = EVENT_TIME.hour();
407
+ const isWeekend = dayOfWeek === 0 || dayOfWeek === 6;
408
+
409
+ if (isWeekend) {
410
+ record.weekend_viewing = true;
411
+ record.prime_time = false;
412
+ if (record.event === "playback completed" && record.watch_duration_min) {
413
+ record.watch_duration_min = Math.round(record.watch_duration_min * 1.5);
414
+ }
415
+ } else {
416
+ record.weekend_viewing = false;
417
+ // Weekday prime-time: 6PM to 11PM
418
+ if (hour >= 18 && hour <= 23) {
419
+ record.prime_time = true;
420
+ } else {
421
+ record.prime_time = false;
305
422
  }
423
+ }
306
424
 
307
- if (chance.bool({ likelihood: 18 })) {
308
- return {};
425
+ // ─────────────────────────────────────────────────────────────
426
+ // Hook #5: NEW RELEASE SPIKE (event)
427
+ // After day 50, blockbuster release drives content selection
428
+ // ─────────────────────────────────────────────────────────────
429
+ const BLOCKBUSTER_RELEASE = DATASET_START.add(50, 'days');
430
+ if (record.event === "content selected" || record.event === "playback started") {
431
+ if (EVENT_TIME.isAfter(BLOCKBUSTER_RELEASE) && chance.bool({ likelihood: 20 })) {
432
+ record.content_type = "movie";
433
+ record.content_id = blockbusterId;
434
+ record.blockbuster_release = true;
435
+ } else {
436
+ record.blockbuster_release = false;
309
437
  }
310
438
  }
311
439
 
312
- if (EVENT_TIME.isBefore(TIME_WE_EXPERIMENTED)) {
313
- if (record.event === "$experiment_started") {
314
- return {};
440
+ if (record.event === "content rated") {
441
+ if (EVENT_TIME.isAfter(BLOCKBUSTER_RELEASE) && chance.bool({ likelihood: 20 })) {
442
+ record.rating = chance.integer({ min: 4, max: 5 });
443
+ record.content_id = blockbusterId;
444
+ record.blockbuster_release = true;
445
+ } else {
446
+ record.blockbuster_release = false;
315
447
  }
316
448
  }
317
- }
318
449
 
450
+ // ─────────────────────────────────────────────────────────────
451
+ // Hook #6: KIDS PROFILE SAFETY (event)
452
+ // 15% of the time, apply kids mode: restrict genres, drop ads
453
+ // ─────────────────────────────────────────────────────────────
454
+ if (chance.bool({ likelihood: 15 })) {
455
+ record.kids_profile = true;
456
+ if (record.event === "content selected" || record.event === "playback started") {
457
+ record.genre = chance.pickone(["animation", "documentary"]);
458
+ } else if (record.event === "ad impression") {
459
+ record.ad_blocked = true;
460
+ }
461
+ } else {
462
+ record.kids_profile = false;
463
+ if (record.event === "ad impression") {
464
+ record.ad_blocked = false;
465
+ }
466
+ }
319
467
 
468
+ return record;
469
+ }
320
470
 
471
+ // ─────────────────────────────────────────────────────────────
472
+ // Hook #2, #4, #8: EVERYTHING PASS - Complex behavioral patterns
473
+ // ─────────────────────────────────────────────────────────────
321
474
  if (type === "everything") {
475
+ const userEvents = record;
476
+ if (!userEvents || userEvents.length === 0) return record;
477
+
478
+ const firstEventTime = dayjs(userEvents[0].time);
479
+
480
+ // ─── First pass: identify user patterns ───
481
+ let consecutiveCompletions = 0;
482
+ let maxConsecutiveCompletions = 0;
483
+ let isBingeWatcher = false;
484
+ let adImpressionCount = 0;
485
+ let isFreeTier = false;
486
+ let hasSubtitlesEnabled = false;
322
487
 
323
- const hadFeatureEnabled = record.some(event =>
324
- event.event === "$experiment_started" &&
325
- event["Variant name"] === "feature enabled"
326
- );
327
-
328
- const hadFeatureDisabled = record.some(event =>
329
- event.event === "$experiment_started" &&
330
- event["Variant name"] === "feature disabled"
331
- );
332
-
333
- record.forEach((event, idx) => {
334
- const EVENT_TIME = dayjs(event.time);
335
-
336
- if (EVENT_TIME.isAfter(TIME_WE_EXPERIMENTED)) {
337
- if (hadFeatureEnabled) {
338
- // Users with feature enabled variant have a higher likelihood of subscribing.
339
- // Add an extra subscribe event 50% of the time immediately after watching a video.
340
- if (event.event === "watch video" && chance.bool({ likelihood: 75 })) {
341
- // watch time goes up
342
- event["watch time"] = v.round(event["watch time"] * 1.7);
343
- const subscribeEvent = {
344
- event: "subscribe",
345
- time: dayjs(event.time).add(1, 'minute').toISOString(),
346
- user_id: event.user_id,
347
- };
348
- record.splice(idx + 1, 0, subscribeEvent);
488
+ userEvents.forEach((event, idx) => {
489
+ // Track subscription tier from superProps
490
+ if (idx === 0 && event.subscription_plan) {
491
+ isFreeTier = event.subscription_plan === "free";
492
+ }
493
+
494
+ // Hook #2: Track consecutive playback completions
495
+ if (event.event === "playback completed") {
496
+ consecutiveCompletions++;
497
+ if (consecutiveCompletions > maxConsecutiveCompletions) {
498
+ maxConsecutiveCompletions = consecutiveCompletions;
499
+ }
500
+ } else if (event.event !== "playback started") {
501
+ // Reset streak on non-playback events (started doesn't break streak)
502
+ consecutiveCompletions = 0;
503
+ }
504
+
505
+ // Hook #4: Count ad impressions for free-tier users
506
+ if (event.event === "ad impression") {
507
+ adImpressionCount++;
508
+ }
509
+
510
+ // Hook #8: Check for subtitle enabled
511
+ if (event.event === "subtitle toggled" && event.action === "enabled") {
512
+ hasSubtitlesEnabled = true;
513
+ }
514
+ });
515
+
516
+ isBingeWatcher = maxConsecutiveCompletions >= 3;
517
+
518
+ // ─── Second pass: set schema defaults then modify ───
519
+
520
+ // Set defaults for conditional properties on all events
521
+ userEvents.forEach((event) => {
522
+ if (event.event === "playback completed") {
523
+ if (event.binge_session === undefined) event.binge_session = false;
524
+ if (event.subtitle_user === undefined) event.subtitle_user = false;
525
+ }
526
+ if (event.event === "playback started") {
527
+ if (event.binge_session === undefined) event.binge_session = false;
528
+ }
529
+ if (event.ad_fatigue === undefined) event.ad_fatigue = false;
530
+ });
531
+
532
+ // Hook #2: BINGE-WATCHING PATTERN
533
+ // Binge-watchers get extra episodes, high completion, fewer pauses
534
+ if (isBingeWatcher) {
535
+ for (let i = userEvents.length - 1; i >= 0; i--) {
536
+ const event = userEvents[i];
537
+ const eventTime = dayjs(event.time);
538
+
539
+ // Remove some pause events (binge-watchers don't stop)
540
+ if (event.event === "playback paused") {
541
+ if (chance.bool({ likelihood: 60 })) {
542
+ userEvents.splice(i, 1);
543
+ continue;
349
544
  }
350
- } else if (hadFeatureDisabled) {
351
- // Users with feature disabled variant have lower likelihood of subscribing.
352
- // Drop subscribe events 50% of the time.
353
- if (event.event === "subscribe" && chance.bool({ likelihood: 75 })) {
354
- record.splice(idx, 1);
545
+ }
546
+
547
+ // Add extra viewing events after completions
548
+ if (event.event === "playback completed" && chance.bool({ likelihood: 40 })) {
549
+ const nextContentId = chance.pickone(contentIds);
550
+ const extraStart = {
551
+ event: "playback started",
552
+ time: eventTime.add(chance.integer({ min: 1, max: 5 }), 'minutes').toISOString(),
553
+ user_id: event.user_id,
554
+ content_id: nextContentId,
555
+ content_type: "series",
556
+ playback_quality: event.playback_quality || "1080p",
557
+ binge_session: true,
558
+ };
559
+ const extraComplete = {
560
+ event: "playback completed",
561
+ time: eventTime.add(chance.integer({ min: 25, max: 60 }), 'minutes').toISOString(),
562
+ user_id: event.user_id,
563
+ content_id: nextContentId,
564
+ content_type: "series",
565
+ watch_duration_min: chance.integer({ min: 20, max: 55 }),
566
+ completion_percent: chance.integer({ min: 90, max: 100 }),
567
+ binge_session: true,
568
+ };
569
+ userEvents.splice(i + 1, 0, extraStart, extraComplete);
570
+ }
571
+ }
572
+ }
573
+
574
+ // Hook #4: AD FATIGUE CHURN
575
+ // Free-tier users with 20+ ads lose 50% of events after day 45
576
+ if (isFreeTier && adImpressionCount >= 20) {
577
+ const churnCutoff = firstEventTime.add(45, 'days');
578
+ for (let i = userEvents.length - 1; i >= 0; i--) {
579
+ const evt = userEvents[i];
580
+ if (dayjs(evt.time).isAfter(churnCutoff)) {
581
+ if (chance.bool({ likelihood: 50 })) {
582
+ userEvents.splice(i, 1);
583
+ } else {
584
+ evt.ad_fatigue = true;
355
585
  }
586
+ }
587
+ }
588
+ }
356
589
 
357
- // watch time goes down
358
- if (event.event === "watch video") {
359
- event["watch time"] = v.round(event["watch time"] * 0.5);
590
+ // Hook #8: SUBTITLE USERS WATCH MORE
591
+ // Users who enabled subtitles have 25% higher completion and 15% longer watch time
592
+ if (hasSubtitlesEnabled) {
593
+ for (let i = 0; i < userEvents.length; i++) {
594
+ const event = userEvents[i];
595
+
596
+ if (event.event === "playback completed") {
597
+ // Boost completion percent (cap at 100)
598
+ if (event.completion_percent) {
599
+ event.completion_percent = Math.min(100, Math.round(event.completion_percent * 1.25));
600
+ }
601
+ // Boost watch duration
602
+ if (event.watch_duration_min) {
603
+ event.watch_duration_min = Math.round(event.watch_duration_min * 1.15);
360
604
  }
605
+ event.subtitle_user = true;
361
606
  }
362
607
  }
363
- });
608
+
609
+ // Splice extra playback completed events (subtitle users watch more overall)
610
+ const completionEvents = userEvents.filter(e => e.event === "playback completed");
611
+ const extraCount = Math.floor(completionEvents.length * 0.2); // 20% more completions
612
+ for (let j = 0; j < extraCount; j++) {
613
+ const templateEvent = chance.pickone(completionEvents);
614
+ const templateTime = dayjs(templateEvent.time);
615
+ const extraCompletion = {
616
+ event: "playback completed",
617
+ time: templateTime.add(chance.integer({ min: 30, max: 180 }), 'minutes').toISOString(),
618
+ user_id: templateEvent.user_id,
619
+ content_id: chance.pickone(contentIds),
620
+ content_type: chance.pickone(["movie", "series", "documentary"]),
621
+ watch_duration_min: chance.integer({ min: 25, max: 120 }),
622
+ completion_percent: chance.integer({ min: 80, max: 100 }),
623
+ subtitle_user: true,
624
+ };
625
+ userEvents.push(extraCompletion);
626
+ }
627
+ }
628
+
629
+ return record;
364
630
  }
365
631
 
366
632
  return record;
@@ -368,3 +634,328 @@ const config = {
368
634
  };
369
635
 
370
636
  export default config;
637
+
638
+ /**
639
+ * ═══════════════════════════════════════════════════════════════════════════════
640
+ * NEEDLE IN A HAYSTACK - STREAMVAULT VIDEO STREAMING ANALYTICS
641
+ * ═══════════════════════════════════════════════════════════════════════════════
642
+ *
643
+ * A video streaming platform dungeon with 8 deliberately architected analytics
644
+ * insights hidden in the data. This dungeon simulates a Netflix/Hulu-style service
645
+ * and is designed to showcase advanced product analytics patterns for streaming
646
+ * media businesses.
647
+ *
648
+ * ═══════════════════════════════════════════════════════════════════════════════
649
+ * DATASET OVERVIEW
650
+ * ═══════════════════════════════════════════════════════════════════════════════
651
+ *
652
+ * - 5,000 users over 100 days
653
+ * - 360,000 events across 17 event types
654
+ * - 3 funnels (onboarding, content discovery, engagement loop)
655
+ * - 1 lookup table (content catalog with 500 titles)
656
+ * - Subscription tiers: Free (ad-supported), Standard, Premium
657
+ * - Device types: Smart TV, Mobile, Tablet, Laptop, Desktop
658
+ *
659
+ * ═══════════════════════════════════════════════════════════════════════════════
660
+ * THE 8 ARCHITECTED HOOKS
661
+ * ═══════════════════════════════════════════════════════════════════════════════
662
+ *
663
+ * Each hook creates a specific, discoverable analytics insight that simulates
664
+ * real-world streaming platform behavior patterns.
665
+ *
666
+ * ───────────────────────────────────────────────────────────────────────────────
667
+ * 1. GENRE FUNNEL CONVERSION (funnel-pre)
668
+ * ───────────────────────────────────────────────────────────────────────────────
669
+ *
670
+ * PATTERN: Comedy and Animation content has 1.3x higher funnel conversion rates,
671
+ * while Documentary content has 0.7x conversion (users browse but abandon more).
672
+ *
673
+ * HOW TO FIND IT:
674
+ * - Break down funnels by genre property
675
+ * - Compare: conversion rate for Comedy/Animation vs Documentary vs other genres
676
+ * - Look for: genre_boost = true or genre_penalty = true tags
677
+ *
678
+ * EXPECTED INSIGHT: Comedy and Animation content converts browsers to completers
679
+ * at 1.3x the baseline rate. Documentary has high browse rates but low completion,
680
+ * suggesting users are interested but find long-form docs harder to finish.
681
+ *
682
+ * REAL-WORLD ANALOGUE: Content genre significantly affects engagement depth.
683
+ * Light entertainment converts better than educational content, informing
684
+ * content acquisition and recommendation strategy.
685
+ *
686
+ * ───────────────────────────────────────────────────────────────────────────────
687
+ * 2. BINGE-WATCHING PATTERN (everything)
688
+ * ───────────────────────────────────────────────────────────────────────────────
689
+ *
690
+ * PATTERN: Users who complete 3+ episodes consecutively become "binge-watchers":
691
+ * - Extra playback started + playback completed events are spliced in
692
+ * - Completion percentages are 90-100% (they finish every episode)
693
+ * - Pause events are reduced by 60% (they don't stop watching)
694
+ * - Events tagged with binge_session = true
695
+ *
696
+ * HOW TO FIND IT:
697
+ * - Segment users by: binge_session = true on any event
698
+ * - Compare: total playback completed count per user
699
+ * - Compare: average completion_percent for binge vs non-binge users
700
+ * - Compare: playback paused frequency
701
+ *
702
+ * EXPECTED INSIGHT: Binge-watchers consume 40-60% more content, with near-perfect
703
+ * completion rates. They pause far less frequently. This cohort drives the majority
704
+ * of total watch hours on the platform.
705
+ *
706
+ * REAL-WORLD ANALOGUE: Netflix's binge-viewing behavior - a small percentage of
707
+ * users generate a disproportionate share of total viewing. Identifying and
708
+ * nurturing binge-watchers is critical for retention and content ROI.
709
+ *
710
+ * ───────────────────────────────────────────────────────────────────────────────
711
+ * 3. WEEKEND vs WEEKDAY PATTERNS (event)
712
+ * ───────────────────────────────────────────────────────────────────────────────
713
+ *
714
+ * PATTERN: Weekend viewing sessions are 1.5x longer than weekday sessions.
715
+ * Weekday viewing concentrates in evening prime-time (6PM-11PM).
716
+ *
717
+ * HOW TO FIND IT:
718
+ * - Filter: playback completed events
719
+ * - Compare: average watch_duration_min by day of week
720
+ * - Filter: weekend_viewing = true vs prime_time = true
721
+ * - Chart: event volume by hour of day, split by weekend vs weekday
722
+ *
723
+ * EXPECTED INSIGHT: Weekend watch_duration_min averages ~67 mins vs ~45 mins
724
+ * weekday. Weekday prime-time (6PM-11PM) accounts for 60-70% of weekday views.
725
+ *
726
+ * REAL-WORLD ANALOGUE: All streaming platforms see this pattern. Understanding
727
+ * peak viewing windows drives content release strategy (release on Friday for
728
+ * weekend binge), ad pricing, and infrastructure capacity planning.
729
+ *
730
+ * ───────────────────────────────────────────────────────────────────────────────
731
+ * 4. AD FATIGUE CHURN (everything)
732
+ * ───────────────────────────────────────────────────────────────────────────────
733
+ *
734
+ * PATTERN: Free-tier users who see 20+ ad impressions experience 50% churn
735
+ * after day 45 of their lifecycle.
736
+ *
737
+ * HOW TO FIND IT:
738
+ * - Segment: subscription_plan = "free"
739
+ * - Count: ad impression events per user
740
+ * - Compare: users with 20+ ads vs <20 ads
741
+ * - Chart: event activity over time for high-ad-exposure free users
742
+ * - Look for: ad_fatigue = true tag on surviving events
743
+ *
744
+ * EXPECTED INSIGHT: Free-tier users with heavy ad exposure show a sharp activity
745
+ * cliff around day 45. Remaining events carry the ad_fatigue tag. This simulates
746
+ * the real tension between ad revenue and user experience on free tiers.
747
+ *
748
+ * REAL-WORLD ANALOGUE: Ad-supported streaming tiers (Hulu, Peacock) must balance
749
+ * ad load against churn. Too many ads drive users to cancel or switch to
750
+ * competitors. This hook reveals the "ad tolerance threshold."
751
+ *
752
+ * ───────────────────────────────────────────────────────────────────────────────
753
+ * 5. NEW RELEASE SPIKE (event)
754
+ * ───────────────────────────────────────────────────────────────────────────────
755
+ *
756
+ * PATTERN: On day 50, a blockbuster movie releases, creating a content spike:
757
+ * - 20% of content selected and playback started events redirect to the blockbuster
758
+ * - Content rated events for the blockbuster skew to 4-5 star ratings
759
+ * - All affected events tagged with blockbuster_release = true
760
+ *
761
+ * HOW TO FIND IT:
762
+ * - Chart: content selected and playback started by day
763
+ * - Filter: blockbuster_release = true
764
+ * - Filter: content_id = blockbuster ID
765
+ * - Compare: ratings distribution before vs after day 50
766
+ *
767
+ * EXPECTED INSIGHT: Clear spike in content engagement after day 50, with a
768
+ * single content_id dominating selections. Ratings for this title cluster at
769
+ * 4-5 stars, showing strong audience reception.
770
+ *
771
+ * REAL-WORLD ANALOGUE: Major content releases (Stranger Things season drop,
772
+ * Disney+ Marvel premiere) create massive engagement spikes that affect all
773
+ * platform metrics. Understanding release impact is crucial for content
774
+ * scheduling and marketing spend.
775
+ *
776
+ * ───────────────────────────────────────────────────────────────────────────────
777
+ * 6. KIDS PROFILE SAFETY (event)
778
+ * ───────────────────────────────────────────────────────────────────────────────
779
+ *
780
+ * PATTERN: 15% of the time, events are tagged as kids profile activity:
781
+ * - Content selection restricted to animation and documentary genres
782
+ * - Ad impressions are dropped entirely (no ads for kids)
783
+ * - Events tagged with kids_profile = true
784
+ *
785
+ * HOW TO FIND IT:
786
+ * - Filter: kids_profile = true
787
+ * - Compare: genre distribution for kids vs non-kids events
788
+ * - Count: ad impression events for kids vs non-kids
789
+ * - Notice: zero ad impressions when kids_profile = true
790
+ *
791
+ * EXPECTED INSIGHT: Kids profile content is 100% animation/documentary. Zero ads
792
+ * served to kids profiles. This shows proper content gating and ad-free kids
793
+ * experience, which is a regulatory and trust requirement.
794
+ *
795
+ * REAL-WORLD ANALOGUE: COPPA compliance and parental controls. All major
796
+ * streaming platforms (Netflix Kids, Disney+, YouTube Kids) restrict content
797
+ * and ads for children's profiles. Verifying this works correctly is critical.
798
+ *
799
+ * ───────────────────────────────────────────────────────────────────────────────
800
+ * 7. RECOMMENDATION ENGINE IMPROVEMENT (funnel-pre)
801
+ * ───────────────────────────────────────────────────────────────────────────────
802
+ *
803
+ * PATTERN: After day 60 (proxied by 50% probability), the engagement loop funnel
804
+ * (recommendation clicked -> playback started -> content rated) gets a 1.5x
805
+ * conversion rate boost, simulating a recommendation engine improvement.
806
+ *
807
+ * HOW TO FIND IT:
808
+ * - Compare: engagement funnel conversion over time (first half vs second half)
809
+ * - Filter: improved_recs = true on funnel events
810
+ * - Chart: recommendation clicked -> playback started conversion by week
811
+ *
812
+ * EXPECTED INSIGHT: The engagement funnel conversion rate improves ~1.5x in the
813
+ * latter half of the dataset. Events tagged with improved_recs = true show higher
814
+ * conversion, simulating an A/B test or algorithm deployment.
815
+ *
816
+ * REAL-WORLD ANALOGUE: Recommendation engine updates are the highest-leverage
817
+ * product changes at streaming companies. Netflix estimates its rec engine saves
818
+ * $1B/year in retention. Measuring before/after impact of algorithm changes is
819
+ * critical product analytics.
820
+ *
821
+ * ───────────────────────────────────────────────────────────────────────────────
822
+ * 8. SUBTITLE USERS WATCH MORE (everything)
823
+ * ───────────────────────────────────────────────────────────────────────────────
824
+ *
825
+ * PATTERN: Users who enable subtitles have measurably higher engagement:
826
+ * - 25% higher completion_percent on playback completed events (capped at 100)
827
+ * - 15% longer watch_duration_min
828
+ * - 20% more playback completed events (extra content consumption)
829
+ * - Events tagged with subtitle_user = true
830
+ *
831
+ * HOW TO FIND IT:
832
+ * - Create segment: users who did "subtitle toggled" where action = "enabled"
833
+ * - Compare: average completion_percent (subtitle users vs non-subtitle users)
834
+ * - Compare: average watch_duration_min
835
+ * - Compare: total playback completed count per user
836
+ *
837
+ * EXPECTED INSIGHT: Subtitle users complete content 25% more often and watch 15%
838
+ * longer per session. They also consume 20% more titles overall. This suggests
839
+ * subtitles reduce comprehension friction and keep viewers engaged.
840
+ *
841
+ * REAL-WORLD ANALOGUE: Subtitle usage has exploded on streaming platforms.
842
+ * Netflix reports 40%+ of viewing uses subtitles. Subtitle users exhibit higher
843
+ * engagement, especially with foreign-language content (Korean dramas, anime).
844
+ * This insight drives investment in subtitle/dub quality and availability.
845
+ *
846
+ * ═══════════════════════════════════════════════════════════════════════════════
847
+ * EXPECTED METRICS SUMMARY
848
+ * ═══════════════════════════════════════════════════════════════════════════════
849
+ *
850
+ * Hook | Metric | Baseline | Hook Effect | Ratio
851
+ * ─────────────────────────|─────────────────────────|───────────|─────────────|──────
852
+ * Genre Funnel Conversion | Funnel conversion rate | 50% | 65% / 35% | 1.3x / 0.7x
853
+ * Binge-Watching | Content consumed/user | 12 | 18-20 | ~1.5x
854
+ * Weekend vs Weekday | Watch duration (min) | 45 | 67 (weekend)| 1.5x
855
+ * Ad Fatigue Churn | Post-day-45 activity | 100% | 50% | 0.5x
856
+ * New Release Spike | Content selections/day | baseline | +20% spike | 1.2x
857
+ * Kids Profile Safety | Ad impressions | normal | 0 (dropped) | 0x
858
+ * Rec Engine Improvement | Engagement funnel conv | 30% | 45% | 1.5x
859
+ * Subtitle Users | Completion percent | 68% | 85% | 1.25x
860
+ *
861
+ * ═══════════════════════════════════════════════════════════════════════════════
862
+ * ADVANCED ANALYSIS IDEAS
863
+ * ═══════════════════════════════════════════════════════════════════════════════
864
+ *
865
+ * CROSS-HOOK PATTERNS:
866
+ *
867
+ * 1. Binge + Subtitle: Do subtitle-enabled binge-watchers have the highest
868
+ * total watch hours? (Hooks #2 + #8 combined)
869
+ *
870
+ * 2. Ad Fatigue + Blockbuster: Does the blockbuster release (Hook #5) rescue
871
+ * free-tier users from ad fatigue churn (Hook #4)?
872
+ *
873
+ * 3. Kids + Weekend: Is kids profile viewing concentrated on weekends (Hook #6
874
+ * + #3)? Does weekend kids viewing show different genre preferences?
875
+ *
876
+ * 4. Rec Engine + Genre: Does the recommendation engine improvement (Hook #7)
877
+ * disproportionately help certain genres (Hook #1)?
878
+ *
879
+ * 5. Subtitle + Binge + Weekend: The "super viewer" - subtitle-enabled,
880
+ * binge-watching on weekends. What is their lifetime watch hours?
881
+ *
882
+ * COHORT ANALYSIS:
883
+ *
884
+ * - Cohort by signup_source: Do referral users binge more than organic?
885
+ * - Cohort by device_type: Do smart TV users watch longer than mobile?
886
+ * - Cohort by subscription_plan: Do premium users binge more, or does
887
+ * ad-free viewing change consumption patterns?
888
+ * - Cohort by preferred_genre: Does genre preference predict churn?
889
+ *
890
+ * FUNNEL ANALYSIS:
891
+ *
892
+ * - Onboarding Funnel: account created -> content browsed -> playback started.
893
+ * How does signup_source affect first-session conversion?
894
+ * - Content Discovery Funnel: Does the browse_section (home vs trending vs
895
+ * genre) affect downstream completion rates?
896
+ * - Engagement Loop: How does recommendation algorithm type (collaborative
897
+ * filtering vs editorial) affect the full loop conversion?
898
+ *
899
+ * MONETIZATION ANALYSIS:
900
+ *
901
+ * - Free-to-Standard conversion: Which events predict upgrade?
902
+ * - Ad tolerance threshold: At what ad count do free users start churning?
903
+ * - Premium value: Do premium users actually consume more content, or just
904
+ * consume at higher quality (4K)?
905
+ * - Download behavior: Does offline download usage correlate with retention?
906
+ *
907
+ * ═══════════════════════════════════════════════════════════════════════════════
908
+ * HOW TO RUN THIS DUNGEON
909
+ * ═══════════════════════════════════════════════════════════════════════════════
910
+ *
911
+ * From the dm4 root directory:
912
+ *
913
+ * npm start
914
+ *
915
+ * Or programmatically:
916
+ *
917
+ * import generate from './index.js';
918
+ * import config from './dungeons/harness-media.js';
919
+ * const results = await generate(config);
920
+ *
921
+ * OUTPUT FILES (with writeToDisk: false, format: "json", gzip: true):
922
+ *
923
+ * - needle-haystack-streaming__events.json.gz - All event data
924
+ * - needle-haystack-streaming__user_profiles.json.gz - User profiles
925
+ * - needle-haystack-streaming__content_id_lookup.json.gz - Content catalog
926
+ *
927
+ * ═══════════════════════════════════════════════════════════════════════════════
928
+ * TESTING YOUR ANALYTICS PLATFORM
929
+ * ═══════════════════════════════════════════════════════════════════════════════
930
+ *
931
+ * This dungeon is perfect for testing:
932
+ *
933
+ * 1. Funnel Breakdown: Can you break down funnels by genre to find conversion differences?
934
+ * 2. Behavioral Clustering: Can you identify binge-watchers from event patterns?
935
+ * 3. Time-Based Analysis: Can you detect weekend vs weekday viewing patterns?
936
+ * 4. Churn Prediction: Can you predict ad-fatigue churn before it happens?
937
+ * 5. Content Impact: Can you measure the blockbuster release's platform-wide effect?
938
+ * 6. Safety Compliance: Can you verify kids profiles never see ads?
939
+ * 7. A/B Testing: Can you measure the recommendation engine improvement's impact?
940
+ * 8. Feature Impact: Can you quantify the subtitle-engagement correlation?
941
+ *
942
+ * ═══════════════════════════════════════════════════════════════════════════════
943
+ * WHY "NEEDLE IN A HAYSTACK"?
944
+ * ═══════════════════════════════════════════════════════════════════════════════
945
+ *
946
+ * Each hook is a "needle" - a meaningful, actionable insight hidden in a
947
+ * "haystack" of 360K events. The challenge is:
948
+ *
949
+ * 1. FINDING the needles (discovery)
950
+ * 2. VALIDATING they're real patterns (statistical significance)
951
+ * 3. UNDERSTANDING why they matter (business impact)
952
+ * 4. ACTING on them (product decisions)
953
+ *
954
+ * This mirrors real-world streaming analytics: your data contains valuable
955
+ * insights about viewer behavior, content performance, and monetization
956
+ * efficiency, but you need the right tools and skills to find them.
957
+ *
958
+ * Happy Streaming!
959
+ *
960
+ * ═══════════════════════════════════════════════════════════════════════════════
961
+ */