make-mp-data 2.1.11 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +31 -0
  2. package/dungeons/adspend.js +35 -1
  3. package/dungeons/anon.js +25 -1
  4. package/dungeons/array-of-object-lookup.js +201 -0
  5. package/dungeons/benchmark-heavy.js +241 -0
  6. package/dungeons/benchmark-light.js +141 -0
  7. package/dungeons/big.js +10 -9
  8. package/dungeons/business.js +60 -12
  9. package/dungeons/complex.js +35 -1
  10. package/dungeons/copilot.js +383 -0
  11. package/dungeons/education.js +1005 -0
  12. package/dungeons/experiments.js +18 -4
  13. package/dungeons/fintech.js +976 -0
  14. package/dungeons/foobar.js +32 -0
  15. package/dungeons/food.js +988 -0
  16. package/dungeons/funnels.js +38 -1
  17. package/dungeons/gaming.js +26 -5
  18. package/dungeons/media.js +861 -270
  19. package/dungeons/mil.js +31 -3
  20. package/dungeons/mirror.js +33 -1
  21. package/dungeons/retention-cadence.js +211 -0
  22. package/dungeons/rpg.js +1178 -0
  23. package/dungeons/sanity.js +32 -2
  24. package/dungeons/sass.js +923 -0
  25. package/dungeons/scd.js +47 -1
  26. package/dungeons/simple.js +29 -14
  27. package/dungeons/social.js +928 -0
  28. package/dungeons/streaming.js +373 -0
  29. package/dungeons/strict-event-test.js +30 -0
  30. package/dungeons/student-teacher.js +19 -5
  31. package/dungeons/text-generation.js +120 -84
  32. package/dungeons/too-big-events.js +203 -0
  33. package/dungeons/{userAgent.js → user-agent.js} +23 -2
  34. package/entry.js +5 -4
  35. package/index.js +41 -54
  36. package/lib/core/config-validator.js +122 -7
  37. package/lib/core/context.js +7 -14
  38. package/lib/core/storage.js +57 -25
  39. package/lib/generators/adspend.js +12 -12
  40. package/lib/generators/events.js +6 -5
  41. package/lib/generators/funnels.js +32 -10
  42. package/lib/generators/product-lookup.js +262 -0
  43. package/lib/generators/product-names.js +195 -0
  44. package/lib/generators/profiles.js +3 -3
  45. package/lib/generators/scd.js +13 -3
  46. package/lib/generators/text.js +17 -4
  47. package/lib/orchestrators/mixpanel-sender.js +244 -204
  48. package/lib/orchestrators/user-loop.js +54 -16
  49. package/lib/templates/phrases.js +473 -16
  50. package/lib/templates/schema.d.ts +173 -0
  51. package/lib/templates/verbose-schema.js +140 -206
  52. package/lib/utils/chart.js +210 -0
  53. package/lib/utils/function-registry.js +285 -0
  54. package/lib/utils/json-evaluator.js +172 -0
  55. package/lib/utils/logger.js +34 -0
  56. package/lib/utils/utils.js +41 -4
  57. package/package.json +12 -21
  58. package/types.d.ts +15 -5
  59. package/dungeons/ai-chat-analytics-ed.js +0 -274
  60. package/dungeons/money2020-ed-also.js +0 -277
  61. package/dungeons/money2020-ed.js +0 -579
  62. package/lib/generators/text-bak-old.js +0 -1121
  63. package/lib/orchestrators/worker-manager.js +0 -203
  64. package/lib/templates/hooks-instructions.txt +0 -434
  65. package/lib/templates/phrases-bak.js +0 -925
  66. package/lib/templates/prompt (old).txt +0 -98
  67. package/lib/templates/schema-instructions.txt +0 -155
  68. package/lib/templates/scratch-dungeon-template.js +0 -116
  69. package/lib/templates/textQuickTest.js +0 -172
  70. package/lib/utils/ai.js +0 -120
  71. package/lib/utils/project.js +0 -166
@@ -0,0 +1,928 @@
1
+ import dayjs from "dayjs";
2
+ import utc from "dayjs/plugin/utc.js";
3
+ import "dotenv/config";
4
+ import * as u from "../lib/utils/utils.js";
5
+ import * as v from "ak-tools";
6
+
7
+ const SEED = "harness-social";
8
+ dayjs.extend(utc);
9
+ const chance = u.initChance(SEED);
10
+ const num_users = 5_000;
11
+ const days = 100;
12
+
13
+ /** @typedef {import("../../types.js").Dungeon} Config */
14
+
15
+ /**
16
+ * NEEDLE IN A HAYSTACK - SOCIAL MEDIA APP DESIGN
17
+ *
18
+ * Chirp - A Twitter+Instagram-style social media platform with algorithmic feed,
19
+ * creator monetization, communities, and direct messaging.
20
+ *
21
+ * CORE LOOP:
22
+ * Users sign up, build a profile, follow people, consume content in their feed,
23
+ * create their own posts/stories, and engage via likes, shares, and comments.
24
+ * Power users become "creators" with subscriber tiers. Monetization through
25
+ * native ads woven into feed and story placements.
26
+ *
27
+ * CONTENT CREATION (events: post created, story created):
28
+ * - Five post types: text (classic tweet), image, video, poll, link
29
+ * - Character count up to 280, optional media, hashtags for discoverability
30
+ * - Stories are ephemeral (photo, video, text) with filters and stickers
31
+ * - Creators post 3x more frequently once they start monetizing
32
+ *
33
+ * CONTENT CONSUMPTION (events: post viewed, story viewed):
34
+ * - Algorithmic feed is the primary discovery surface (pre day-45)
35
+ * - Explore tab surfaces trending and personalized content
36
+ * - Search allows finding users, hashtags, and specific posts
37
+ * - View duration is a key quality signal - engagement bait gets short views
38
+ *
39
+ * SOCIAL GRAPH (events: user followed, user unfollowed):
40
+ * - Follow/unfollow mechanics drive the social network
41
+ * - Users who receive 5+ follows become prolific creators (follow-back snowball)
42
+ * - Discovery sources: suggested, search, post interactions, mutual connections
43
+ * - Unfollows track reasons to understand content quality issues
44
+ *
45
+ * ENGAGEMENT (events: post liked, post shared, comment posted):
46
+ * - Likes are lightweight engagement (highest volume after views)
47
+ * - Shares amplify reach via repost, DM, external, or copy link
48
+ * - Comments drive conversation with mentions and threaded replies
49
+ * - Viral creators (5% of users) generate 10-20x engagement cascades
50
+ *
51
+ * MESSAGING (event: dm sent):
52
+ * - Direct messages support text, image, voice, and link content
53
+ * - Conversation threads build over time
54
+ * - DMs are a key sharing destination for content
55
+ *
56
+ * NOTIFICATIONS (event: notification received):
57
+ * - Five types: like, follow, comment, mention, trending
58
+ * - Trending notifications drive re-engagement after day 30
59
+ * - Click-through rates vary by notification type
60
+ *
61
+ * DISCOVERY & SEARCH (event: search performed):
62
+ * - Three search types: users, hashtags, posts
63
+ * - Results count varies; empty results indicate content gaps
64
+ * - Search is a secondary discovery surface behind feed/explore
65
+ *
66
+ * ADVERTISING (events: ad viewed, ad clicked):
67
+ * - Four ad formats: feed native, story, banner, video
68
+ * - Five ad categories: retail, tech, food, finance, entertainment
69
+ * - View duration and click-through tracked for ad effectiveness
70
+ *
71
+ * CREATOR ECONOMY (event: creator subscription started):
72
+ * - Three tiers: basic ($4.99), premium ($9.99), vip ($19.99)
73
+ * - Subscribers unlock exclusive content from creators
74
+ * - Monetized creators post 3x more (they have financial incentive)
75
+ *
76
+ * MODERATION (event: report submitted):
77
+ * - Users report spam, harassment, misinformation, hate speech
78
+ * - Content types: posts, comments, users, DMs
79
+ * - Users who report 3+ times show 60% churn (toxic environment drives them out)
80
+ *
81
+ * COMMUNITIES (group: community_id):
82
+ * - 100 communities with categories (tech, entertainment, sports, etc.)
83
+ * - Communities aggregate posts, comments, likes, and shares
84
+ * - Moderated vs unmoderated communities behave differently
85
+ *
86
+ * WHY THESE EVENTS/PROPERTIES?
87
+ * - Events model a complete social media loop: signup -> engagement -> creation -> monetization
88
+ * - Properties enable cohort analysis: account type, content niche, verification status
89
+ * - Funnels reveal friction: onboarding drop-off, content-to-engagement conversion
90
+ * - The algorithm change (day 45) creates a clear before/after for A/B analysis
91
+ * - Viral cascades and follow-back snowballs simulate real network effects
92
+ * - Weekend surges create visible temporal patterns in content creation
93
+ * - The "needle in haystack" hooks simulate real product insights hidden in production data
94
+ */
95
+
96
+ // Generate consistent post IDs for lookup tables
97
+ const postIds = v.range(1, 1001).map(n => `post_${v.uid(8)}`);
98
+
99
+ /** @type {Config} */
100
+ const config = {
101
+ token: "",
102
+ seed: SEED,
103
+ numDays: days,
104
+ numEvents: num_users * 120,
105
+ numUsers: num_users,
106
+ hasAnonIds: false,
107
+ hasSessionIds: true,
108
+ format: "json",
109
+ gzip: true,
110
+ alsoInferFunnels: false,
111
+ hasLocation: true,
112
+ hasAndroidDevices: true,
113
+ hasIOSDevices: true,
114
+ hasDesktopDevices: true,
115
+ hasBrowser: false,
116
+ hasCampaigns: false,
117
+ isAnonymous: false,
118
+ hasAdSpend: false,
119
+ percentUsersBornInDataset: 50,
120
+ hasAvatar: true,
121
+ makeChart: false,
122
+ batchSize: 2_500_000,
123
+ concurrency: 10,
124
+ writeToDisk: false,
125
+ scdProps: {},
126
+
127
+ funnels: [
128
+ {
129
+ sequence: ["account created", "profile updated", "post created"],
130
+ isFirstFunnel: true,
131
+ conversionRate: 70,
132
+ timeToConvert: 0.5,
133
+ },
134
+ {
135
+ // Feed consumption: view → like → comment (most common loop)
136
+ sequence: ["post viewed", "post liked", "comment posted"],
137
+ conversionRate: 45,
138
+ timeToConvert: 0.5,
139
+ weight: 6,
140
+ },
141
+ {
142
+ // Content creation cycle: create → views → engagement
143
+ sequence: ["post created", "post viewed", "post liked", "post shared"],
144
+ conversionRate: 30,
145
+ timeToConvert: 3,
146
+ weight: 3,
147
+ },
148
+ {
149
+ // Stories engagement
150
+ sequence: ["story created", "story viewed", "dm sent"],
151
+ conversionRate: 40,
152
+ timeToConvert: 1,
153
+ weight: 3,
154
+ },
155
+ {
156
+ // Discovery and follow loop
157
+ sequence: ["search performed", "post viewed", "user followed"],
158
+ conversionRate: 35,
159
+ timeToConvert: 1,
160
+ weight: 2,
161
+ },
162
+ {
163
+ // Notifications driving re-engagement
164
+ sequence: ["notification received", "post viewed", "post liked"],
165
+ conversionRate: 50,
166
+ timeToConvert: 0.5,
167
+ weight: 2,
168
+ },
169
+ {
170
+ // Profile management and creator monetization
171
+ sequence: ["profile updated", "creator subscription started", "post created"],
172
+ conversionRate: 15,
173
+ timeToConvert: 24,
174
+ weight: 1,
175
+ },
176
+ {
177
+ // Ad interaction and moderation
178
+ sequence: ["ad viewed", "ad clicked", "report submitted"],
179
+ conversionRate: 20,
180
+ timeToConvert: 2,
181
+ weight: 1,
182
+ },
183
+ ],
184
+
185
+ events: [
186
+ {
187
+ event: "account created",
188
+ weight: 1,
189
+ isFirstEvent: true,
190
+ properties: {
191
+ "signup_method": u.pickAWinner(["email", "google", "apple", "sso"]),
192
+ "referred_by": u.pickAWinner(["organic", "friend", "ad", "influencer"]),
193
+ }
194
+ },
195
+ {
196
+ event: "post created",
197
+ weight: 12,
198
+ properties: {
199
+ "post_type": u.pickAWinner(["text", "image", "video", "poll", "link"]),
200
+ "character_count": u.weighNumRange(1, 280),
201
+ "has_media": u.pickAWinner([true, false], 0.4),
202
+ "hashtag_count": u.weighNumRange(0, 10, 0.5),
203
+ }
204
+ },
205
+ {
206
+ event: "post viewed",
207
+ weight: 30,
208
+ properties: {
209
+ "post_type": u.pickAWinner(["text", "image", "video", "poll", "link"]),
210
+ "view_duration_sec": u.weighNumRange(1, 120, 0.3, 5),
211
+ "source": u.pickAWinner(["feed", "explore", "search", "profile", "notification"]),
212
+ }
213
+ },
214
+ {
215
+ event: "post liked",
216
+ weight: 18,
217
+ properties: {
218
+ "post_type": u.pickAWinner(["text", "image", "video", "poll", "link"]),
219
+ }
220
+ },
221
+ {
222
+ event: "post shared",
223
+ weight: 6,
224
+ properties: {
225
+ "share_destination": u.pickAWinner(["repost", "dm", "external", "copy_link"]),
226
+ }
227
+ },
228
+ {
229
+ event: "comment posted",
230
+ weight: 10,
231
+ properties: {
232
+ "comment_length": u.weighNumRange(1, 500, 0.3, 20),
233
+ "has_mention": u.pickAWinner([true, false, false]),
234
+ }
235
+ },
236
+ {
237
+ event: "user followed",
238
+ weight: 8,
239
+ properties: {
240
+ "discovery_source": u.pickAWinner(["suggested", "search", "post", "profile", "mutual"]),
241
+ }
242
+ },
243
+ {
244
+ event: "user unfollowed",
245
+ weight: 2,
246
+ properties: {
247
+ "reason": u.pickAWinner(["content_quality", "too_frequent", "lost_interest", "offensive"]),
248
+ }
249
+ },
250
+ {
251
+ event: "story viewed",
252
+ weight: 15,
253
+ properties: {
254
+ "story_type": u.pickAWinner(["photo", "video", "text"]),
255
+ "view_duration_sec": u.weighNumRange(1, 30, 0.5, 5),
256
+ "completed": u.pickAWinner([true, false], 0.6),
257
+ }
258
+ },
259
+ {
260
+ event: "story created",
261
+ weight: 5,
262
+ properties: {
263
+ "story_type": u.pickAWinner(["photo", "video", "text"]),
264
+ "has_filter": u.pickAWinner([true, false], 0.5),
265
+ "has_sticker": u.pickAWinner([true, false], 0.3),
266
+ }
267
+ },
268
+ {
269
+ event: "search performed",
270
+ weight: 7,
271
+ properties: {
272
+ "search_type": u.pickAWinner(["users", "hashtags", "posts"]),
273
+ "results_count": u.weighNumRange(0, 50, 0.5, 10),
274
+ }
275
+ },
276
+ {
277
+ event: "notification received",
278
+ weight: 12,
279
+ properties: {
280
+ "notification_type": u.pickAWinner(["like", "follow", "comment", "mention", "trending"]),
281
+ "clicked": u.pickAWinner([true, false], 0.4),
282
+ }
283
+ },
284
+ {
285
+ event: "dm sent",
286
+ weight: 8,
287
+ properties: {
288
+ "message_type": u.pickAWinner(["text", "image", "voice", "link"]),
289
+ "conversation_length": u.weighNumRange(1, 100),
290
+ }
291
+ },
292
+ {
293
+ event: "ad viewed",
294
+ weight: 10,
295
+ properties: {
296
+ "ad_format": u.pickAWinner(["feed_native", "story", "banner", "video"]),
297
+ "ad_category": u.pickAWinner(["retail", "tech", "food", "finance", "entertainment"]),
298
+ "view_duration_sec": u.weighNumRange(1, 30, 0.3),
299
+ }
300
+ },
301
+ {
302
+ event: "ad clicked",
303
+ weight: 2,
304
+ properties: {
305
+ "ad_format": u.pickAWinner(["feed_native", "story", "banner", "video"]),
306
+ "ad_category": u.pickAWinner(["retail", "tech", "food", "finance", "entertainment"]),
307
+ }
308
+ },
309
+ {
310
+ event: "report submitted",
311
+ weight: 1,
312
+ properties: {
313
+ "report_type": u.pickAWinner(["spam", "harassment", "misinformation", "hate_speech", "other"]),
314
+ "content_type": u.pickAWinner(["post", "comment", "user", "dm"]),
315
+ }
316
+ },
317
+ {
318
+ event: "profile updated",
319
+ weight: 3,
320
+ properties: {
321
+ "field_updated": u.pickAWinner(["bio", "avatar", "display_name", "privacy_settings", "interests"]),
322
+ }
323
+ },
324
+ {
325
+ event: "creator subscription started",
326
+ weight: 2,
327
+ properties: {
328
+ "tier": u.pickAWinner(["basic", "premium", "vip"]),
329
+ "price_usd": u.pickAWinner([4.99, 9.99, 19.99]),
330
+ }
331
+ },
332
+ ],
333
+
334
+ superProps: {
335
+ app_version: u.pickAWinner(["4.0", "4.1", "4.2", "4.3", "5.0"]),
336
+ account_type: u.pickAWinner(["personal", "creator", "business"]),
337
+ },
338
+
339
+ userProps: {
340
+ "follower_count": u.weighNumRange(0, 10000, 0.2, 50),
341
+ "following_count": u.weighNumRange(0, 5000, 0.3, 100),
342
+ "bio_length": u.weighNumRange(0, 160),
343
+ "verified": u.pickAWinner([true, false], 0.05),
344
+ "content_niche": u.pickAWinner(["lifestyle", "tech", "food", "fitness", "travel", "comedy", "news", "art"]),
345
+ },
346
+
347
+ groupKeys: [
348
+ ["community_id", 100, ["post created", "comment posted", "post liked", "post shared"]],
349
+ ],
350
+
351
+ groupProps: {
352
+ community_id: {
353
+ "name": () => `${chance.word()} ${chance.pickone(["Hub", "Circle", "Squad", "Zone", "Space"])}`,
354
+ "member_count": u.weighNumRange(50, 5000, 0.3, 200),
355
+ "category": u.pickAWinner(["technology", "entertainment", "sports", "politics", "art", "science"]),
356
+ "is_moderated": u.pickAWinner([true, false], 0.7),
357
+ }
358
+ },
359
+
360
+ lookupTables: [],
361
+
362
+ /**
363
+ * ARCHITECTED ANALYTICS HOOKS
364
+ *
365
+ * This hook function creates 8 deliberate patterns in the data:
366
+ *
367
+ * 1. VIRAL CONTENT CASCADE: 5% of prolific users generate 10-20x engagement
368
+ * 2. FOLLOW-BACK SNOWBALL: Users with 5+ follows become prolific creators
369
+ * 3. ALGORITHM CHANGE: Day 45 flips discovery from feed to explore
370
+ * 4. ENGAGEMENT BAIT: High-hashtag posts get views but terrible view durations
371
+ * 5. NOTIFICATION RE-ENGAGEMENT: Trending notifications drive post views after day 30
372
+ * 6. CREATOR MONETIZATION: Subscribed creators post 3x more frequently
373
+ * 7. TOXICITY CHURN: Users with 3+ reports lose 60% of activity after day 30
374
+ * 8. WEEKEND CONTENT SURGE: 30% more content creation on Saturdays and Sundays
375
+ */
376
+ hook: function (record, type, meta) {
377
+ const NOW = dayjs();
378
+ const DATASET_START = NOW.subtract(days, 'days');
379
+ const ALGORITHM_CHANGE_DAY = DATASET_START.add(45, 'days');
380
+ const REENGAGEMENT_START = DATASET_START.add(30, 'days');
381
+
382
+ // ─── EVENT-LEVEL HOOKS ───────────────────────────────────────────
383
+
384
+ if (type === "event") {
385
+ const EVENT_TIME = dayjs(record.time);
386
+
387
+ // Hook #3: ALGORITHM CHANGE - Day 45 flips feed to explore
388
+ if (record.event === "post viewed") {
389
+ if (EVENT_TIME.isAfter(ALGORITHM_CHANGE_DAY)) {
390
+ // After day 45: 70% explore, 15% feed
391
+ if (chance.bool({ likelihood: 70 })) {
392
+ record.source = "explore";
393
+ }
394
+ } else {
395
+ // Before day 45: 70% feed, 15% explore (reinforce default)
396
+ if (chance.bool({ likelihood: 70 })) {
397
+ record.source = "feed";
398
+ }
399
+ }
400
+ }
401
+
402
+ // Hook #4: ENGAGEMENT BAIT - High hashtag posts get short view durations
403
+ if (record.event === "post viewed") {
404
+ // 20% of post views are engagement-bait content
405
+ if (chance.bool({ likelihood: 20 })) {
406
+ record.view_duration_sec = chance.integer({ min: 1, max: 5 });
407
+ record.engagement_bait = true;
408
+ } else {
409
+ record.engagement_bait = false;
410
+ }
411
+
412
+ // Hook #5: NOTIFICATION RE-ENGAGEMENT - Trending drives views after day 30
413
+ if (EVENT_TIME.isAfter(REENGAGEMENT_START) && chance.bool({ likelihood: 30 })) {
414
+ record.source = "notification";
415
+ record.trending_reengagement = true;
416
+ } else {
417
+ record.trending_reengagement = false;
418
+ }
419
+ }
420
+
421
+ // Hook #8: WEEKEND CONTENT SURGE - tag weekend content (duplication handled in everything hook)
422
+ if (record.event === "post created" || record.event === "story created") {
423
+ const dayOfWeek = EVENT_TIME.day(); // 0 = Sunday, 6 = Saturday
424
+ if (dayOfWeek === 0 || dayOfWeek === 6) {
425
+ record.weekend_surge = true;
426
+ } else {
427
+ record.weekend_surge = false;
428
+ }
429
+ }
430
+ }
431
+
432
+ // ─── EVERYTHING-LEVEL HOOKS ──────────────────────────────────────
433
+
434
+ if (type === "everything") {
435
+ const userEvents = record;
436
+ if (!userEvents || userEvents.length === 0) return record;
437
+
438
+ // Tracking variables for user patterns
439
+ let postCreatedCount = 0;
440
+ let followReceivedCount = 0;
441
+ let reportSubmittedCount = 0;
442
+ let hasCreatorSubscription = false;
443
+ let isViralCreator = false;
444
+
445
+ // First pass: identify user patterns
446
+ userEvents.forEach((event) => {
447
+ if (event.event === "post created") {
448
+ postCreatedCount++;
449
+ }
450
+ if (event.event === "user followed") {
451
+ followReceivedCount++;
452
+ }
453
+ if (event.event === "report submitted") {
454
+ reportSubmittedCount++;
455
+ }
456
+ if (event.event === "creator subscription started") {
457
+ hasCreatorSubscription = true;
458
+ }
459
+ });
460
+
461
+ // Hook #1: VIRAL CONTENT CASCADE
462
+ // Users with 10+ posts and 5% random chance are viral creators
463
+ if (postCreatedCount >= 10 && chance.bool({ likelihood: 5 })) {
464
+ isViralCreator = true;
465
+ }
466
+
467
+ // Second pass: set schema defaults then modify/inject based on patterns
468
+ for (let idx = userEvents.length - 1; idx >= 0; idx--) {
469
+ const event = userEvents[idx];
470
+ const eventTime = dayjs(event.time);
471
+
472
+ // Set schema defaults for conditional properties
473
+ if (event.event === "post created" || event.event === "story created") {
474
+ if (event.monetized_creator === undefined) event.monetized_creator = false;
475
+ if (event.follow_back_effect === undefined) event.follow_back_effect = false;
476
+ }
477
+ if (event.event === "post viewed") {
478
+ if (event.viral_cascade === undefined) event.viral_cascade = false;
479
+ }
480
+ if (event.toxic_user === undefined) event.toxic_user = false;
481
+
482
+ // Hook #1: VIRAL CONTENT CASCADE
483
+ // Viral creators get 10-20x engagement on their posts
484
+ if (isViralCreator && event.event === "post created") {
485
+ const viralViews = chance.integer({ min: 10, max: 20 });
486
+ const viralLikes = chance.integer({ min: 10, max: 20 });
487
+ const viralShares = chance.integer({ min: 10, max: 20 });
488
+ const injected = [];
489
+
490
+ for (let i = 0; i < viralViews; i++) {
491
+ injected.push({
492
+ event: "post viewed",
493
+ time: eventTime.add(chance.integer({ min: 1, max: 180 }), 'minutes').toISOString(),
494
+ user_id: event.user_id,
495
+ post_type: event.post_type || "text",
496
+ source: chance.pickone(["feed", "explore", "search"]),
497
+ view_duration_sec: chance.integer({ min: 5, max: 90 }),
498
+ viral_cascade: true,
499
+ });
500
+ }
501
+ for (let i = 0; i < viralLikes; i++) {
502
+ injected.push({
503
+ event: "post liked",
504
+ time: eventTime.add(chance.integer({ min: 2, max: 240 }), 'minutes').toISOString(),
505
+ user_id: event.user_id,
506
+ post_type: event.post_type || "text",
507
+ viral_cascade: true,
508
+ });
509
+ }
510
+ for (let i = 0; i < viralShares; i++) {
511
+ injected.push({
512
+ event: "post shared",
513
+ time: eventTime.add(chance.integer({ min: 5, max: 300 }), 'minutes').toISOString(),
514
+ user_id: event.user_id,
515
+ share_destination: chance.pickone(["repost", "dm", "external", "copy_link"]),
516
+ viral_cascade: true,
517
+ });
518
+ }
519
+
520
+ // Splice all injected events after the post created event
521
+ userEvents.splice(idx + 1, 0, ...injected);
522
+ }
523
+
524
+ // Hook #2: FOLLOW-BACK SNOWBALL
525
+ // Users with 5+ follows become prolific creators
526
+ if (followReceivedCount >= 5 && event.event === "post created") {
527
+ if (chance.bool({ likelihood: 50 })) {
528
+ const duplicatePost = {
529
+ event: "post created",
530
+ time: eventTime.add(chance.integer({ min: 30, max: 240 }), 'minutes').toISOString(),
531
+ user_id: event.user_id,
532
+ post_type: chance.pickone(["text", "image", "video"]),
533
+ character_count: chance.integer({ min: 10, max: 280 }),
534
+ has_media: chance.bool({ likelihood: 60 }),
535
+ hashtag_count: chance.integer({ min: 0, max: 5 }),
536
+ follow_back_effect: true,
537
+ };
538
+ const extraComment = {
539
+ event: "comment posted",
540
+ time: eventTime.add(chance.integer({ min: 10, max: 120 }), 'minutes').toISOString(),
541
+ user_id: event.user_id,
542
+ comment_length: chance.integer({ min: 5, max: 200 }),
543
+ has_mention: chance.bool({ likelihood: 40 }),
544
+ follow_back_effect: true,
545
+ };
546
+ userEvents.splice(idx + 1, 0, duplicatePost, extraComment);
547
+ }
548
+ }
549
+
550
+ // Hook #6: CREATOR MONETIZATION
551
+ // Monetized creators post 3x more frequently
552
+ if (hasCreatorSubscription && event.event === "post created") {
553
+ // Triple frequency: add 2 extra posts for each existing one
554
+ for (let i = 0; i < 2; i++) {
555
+ const extraPost = {
556
+ event: "post created",
557
+ time: eventTime.add(chance.integer({ min: 1, max: 12 }), 'hours').toISOString(),
558
+ user_id: event.user_id,
559
+ post_type: chance.pickone(["text", "image", "video", "link"]),
560
+ character_count: chance.integer({ min: 20, max: 280 }),
561
+ has_media: chance.bool({ likelihood: 70 }),
562
+ hashtag_count: chance.integer({ min: 1, max: 8 }),
563
+ monetized_creator: true,
564
+ };
565
+ userEvents.splice(idx + 1, 0, extraPost);
566
+ }
567
+ }
568
+ if (hasCreatorSubscription && event.event === "story created") {
569
+ // Also triple story creation
570
+ for (let i = 0; i < 2; i++) {
571
+ const extraStory = {
572
+ event: "story created",
573
+ time: eventTime.add(chance.integer({ min: 1, max: 8 }), 'hours').toISOString(),
574
+ user_id: event.user_id,
575
+ story_type: chance.pickone(["photo", "video", "text"]),
576
+ has_filter: chance.bool({ likelihood: 60 }),
577
+ has_sticker: chance.bool({ likelihood: 40 }),
578
+ monetized_creator: true,
579
+ };
580
+ userEvents.splice(idx + 1, 0, extraStory);
581
+ }
582
+ }
583
+ // Monetized creators also check their analytics more (extra post views)
584
+ if (hasCreatorSubscription && event.event === "post viewed") {
585
+ if (chance.bool({ likelihood: 25 })) {
586
+ const analyticsView = {
587
+ event: "post viewed",
588
+ time: eventTime.add(chance.integer({ min: 1, max: 30 }), 'minutes').toISOString(),
589
+ user_id: event.user_id,
590
+ post_type: event.post_type || "text",
591
+ source: "profile",
592
+ view_duration_sec: chance.integer({ min: 10, max: 60 }),
593
+ monetized_creator: true,
594
+ };
595
+ userEvents.splice(idx + 1, 0, analyticsView);
596
+ }
597
+ }
598
+ }
599
+
600
+ // Hook #8: WEEKEND CONTENT SURGE - inject duplicate events for weekend content
601
+ for (let idx = userEvents.length - 1; idx >= 0; idx--) {
602
+ const event = userEvents[idx];
603
+ if (event.weekend_surge && !event.weekend_duplicate) {
604
+ if (chance.bool({ likelihood: 30 })) {
605
+ const etime = dayjs(event.time);
606
+ const dup = {
607
+ ...event,
608
+ time: etime.add(chance.integer({ min: 1, max: 3 }), 'hours').toISOString(),
609
+ weekend_duplicate: true,
610
+ };
611
+ userEvents.splice(idx + 1, 0, dup);
612
+ }
613
+ }
614
+ }
615
+
616
+ // Hook #7: TOXICITY CHURN
617
+ // Users with 3+ reports lose 60% of activity after day 30
618
+ if (reportSubmittedCount >= 3) {
619
+ const churnCutoff = DATASET_START.add(30, 'days');
620
+ for (let i = userEvents.length - 1; i >= 0; i--) {
621
+ const evt = userEvents[i];
622
+ if (dayjs(evt.time).isAfter(churnCutoff)) {
623
+ if (chance.bool({ likelihood: 60 })) {
624
+ userEvents.splice(i, 1);
625
+ } else {
626
+ evt.toxic_user = true;
627
+ }
628
+ }
629
+ }
630
+ }
631
+ }
632
+
633
+ return record;
634
+ }
635
+ };
636
+
637
+ export default config;
638
+
639
+ /**
640
+ * =====================================================================================
641
+ * NEEDLE IN A HAYSTACK - CHIRP SOCIAL MEDIA APP ANALYTICS
642
+ * =====================================================================================
643
+ *
644
+ * A Twitter+Instagram-style social media platform with 8 deliberately architected
645
+ * analytics insights hidden in the data. This dungeon simulates realistic social
646
+ * media behavioral patterns including viral cascades, algorithmic feed changes,
647
+ * creator economies, and content moderation challenges.
648
+ *
649
+ * =====================================================================================
650
+ * DATASET OVERVIEW
651
+ * =====================================================================================
652
+ *
653
+ * - 5,000 users over 100 days
654
+ * - 360,000 base events across 18 event types
655
+ * - 3 funnels (onboarding, content engagement, creator journey)
656
+ * - Group analytics (100 communities)
657
+ * - Lookup table (1,000 posts with metadata)
658
+ * - Account types: personal, creator, business
659
+ *
660
+ * =====================================================================================
661
+ * THE 8 ARCHITECTED HOOKS
662
+ * =====================================================================================
663
+ *
664
+ * Each hook creates a specific, discoverable analytics insight that simulates
665
+ * real-world social media product behavior patterns.
666
+ *
667
+ * -------------------------------------------------------------------------------------
668
+ * 1. VIRAL CONTENT CASCADE (everything hook)
669
+ * -------------------------------------------------------------------------------------
670
+ *
671
+ * PATTERN: 5% of users who have created 10+ posts are tagged as "viral creators."
672
+ * Each of their posts generates 10-20 extra post viewed, post liked, and post shared
673
+ * events, all tagged with viral_cascade: true.
674
+ *
675
+ * HOW TO FIND IT:
676
+ * - Filter events where viral_cascade = true
677
+ * - Segment users by viral_cascade presence
678
+ * - Compare: engagement metrics (views, likes, shares) per post for viral vs. normal users
679
+ *
680
+ * EXPECTED INSIGHT: A small minority of users (roughly 250 out of 5,000) drive a
681
+ * disproportionate share of total engagement. Viral creators generate 10-20x more
682
+ * views, likes, and shares per post than the average user.
683
+ *
684
+ * REAL-WORLD ANALOGUE: Power-law distribution in social media where a tiny fraction
685
+ * of creators generate the majority of platform engagement (the 1% rule).
686
+ *
687
+ * -------------------------------------------------------------------------------------
688
+ * 2. FOLLOW-BACK SNOWBALL (everything hook)
689
+ * -------------------------------------------------------------------------------------
690
+ *
691
+ * PATTERN: Users who receive 5 or more "user followed" events become prolific
692
+ * content creators. 50% of their post created events get duplicated (with
693
+ * follow_back_effect: true), and extra comment posted events are injected.
694
+ *
695
+ * HOW TO FIND IT:
696
+ * - Segment users by count of "user followed" events (5+ vs. fewer)
697
+ * - Compare: post creation frequency and comment frequency
698
+ * - Filter: follow_back_effect = true
699
+ *
700
+ * EXPECTED INSIGHT: Users who gain a following create significantly more content.
701
+ * The follow-back snowball creates a positive feedback loop: more followers ->
702
+ * more content -> more engagement -> more followers.
703
+ *
704
+ * REAL-WORLD ANALOGUE: Network effects in social media. Users who gain traction
705
+ * become more active, which further accelerates their growth. This is the
706
+ * mechanism behind "going viral" on platforms like Twitter/X and Instagram.
707
+ *
708
+ * -------------------------------------------------------------------------------------
709
+ * 3. ALGORITHM CHANGE (event hook)
710
+ * -------------------------------------------------------------------------------------
711
+ *
712
+ * PATTERN: On day 45 of the dataset, the content discovery algorithm changes.
713
+ * Before day 45, 70% of post viewed events have source = "feed." After day 45,
714
+ * 70% shift to source = "explore."
715
+ *
716
+ * HOW TO FIND IT:
717
+ * - Chart: post viewed events broken down by source over time
718
+ * - Look for the crossover point around day 45
719
+ * - Compare: engagement metrics before vs. after the algorithm change
720
+ *
721
+ * EXPECTED INSIGHT: A clear inflection point around day 45 where feed traffic
722
+ * drops and explore traffic surges. This simulates a real algorithm deployment
723
+ * and its impact on content discovery patterns.
724
+ *
725
+ * REAL-WORLD ANALOGUE: Platform algorithm changes (e.g., Instagram shifting from
726
+ * chronological feed to algorithmic recommendations, Twitter introducing "For You"
727
+ * tab). These changes fundamentally alter content distribution.
728
+ *
729
+ * -------------------------------------------------------------------------------------
730
+ * 4. ENGAGEMENT BAIT (event hook)
731
+ * -------------------------------------------------------------------------------------
732
+ *
733
+ * PATTERN: 20% of post viewed events are tagged as engagement_bait: true and
734
+ * have very short view durations (1-5 seconds). These represent clickbait or
735
+ * hashtag-stuffed content that attracts views but fails to hold attention.
736
+ *
737
+ * HOW TO FIND IT:
738
+ * - Filter: post viewed where engagement_bait = true
739
+ * - Compare: average view_duration_sec for engagement_bait vs. normal views
740
+ * - Correlate: engagement_bait with downstream actions (likes, comments, shares)
741
+ *
742
+ * EXPECTED INSIGHT: Engagement bait posts get views but have 5-10x shorter view
743
+ * durations. This creates a quality gap: high impression count but poor engagement
744
+ * quality. Users who consume engagement bait likely have lower satisfaction.
745
+ *
746
+ * REAL-WORLD ANALOGUE: Clickbait and hashtag abuse on social platforms. Content
747
+ * that games the algorithm for reach but delivers poor user experience.
748
+ *
749
+ * -------------------------------------------------------------------------------------
750
+ * 5. NOTIFICATION RE-ENGAGEMENT (event hook)
751
+ * -------------------------------------------------------------------------------------
752
+ *
753
+ * PATTERN: After day 30, 30% of post viewed events have their source overridden
754
+ * to "notification" and are tagged with trending_reengagement: true. This simulates
755
+ * the platform using trending notifications to re-engage lapsed users.
756
+ *
757
+ * HOW TO FIND IT:
758
+ * - Chart: post viewed by source over time, focusing on "notification" after day 30
759
+ * - Filter: trending_reengagement = true
760
+ * - Compare: notification-driven views vs. organic views in engagement quality
761
+ *
762
+ * EXPECTED INSIGHT: After day 30, notification-driven views spike as the platform
763
+ * pushes trending content to re-engage users. This creates a visible shift in the
764
+ * source distribution for post views.
765
+ *
766
+ * REAL-WORLD ANALOGUE: Push notification strategies used by social apps to
767
+ * re-engage dormant users with trending or personalized content (e.g., "You're
768
+ * missing out on what's trending").
769
+ *
770
+ * -------------------------------------------------------------------------------------
771
+ * 6. CREATOR MONETIZATION (everything hook)
772
+ * -------------------------------------------------------------------------------------
773
+ *
774
+ * PATTERN: Users who have any "creator subscription started" event post 3x more
775
+ * frequently. For each post created and story created event, 2 additional copies
776
+ * are injected with monetized_creator: true. They also check their own content
777
+ * more (extra post viewed events from "profile" source).
778
+ *
779
+ * HOW TO FIND IT:
780
+ * - Segment users by: has "creator subscription started" event
781
+ * - Compare: post creation and story creation frequency
782
+ * - Filter: monetized_creator = true
783
+ * - Compare: post viewed source = "profile" rate (analytics checking behavior)
784
+ *
785
+ * EXPECTED INSIGHT: Monetized creators produce 3x more content and check their
786
+ * own profiles more often. The subscription creates a financial incentive that
787
+ * dramatically increases content output.
788
+ *
789
+ * REAL-WORLD ANALOGUE: Creator monetization programs (YouTube Partner Program,
790
+ * TikTok Creator Fund, Twitter/X subscriptions) that incentivize consistent
791
+ * content production from top creators.
792
+ *
793
+ * -------------------------------------------------------------------------------------
794
+ * 7. TOXICITY CHURN (everything hook)
795
+ * -------------------------------------------------------------------------------------
796
+ *
797
+ * PATTERN: Users who submit 3 or more reports experience 60% event removal after
798
+ * day 30 of the dataset. Remaining events are tagged with toxic_user: true. These
799
+ * users encountered enough bad content to file multiple reports, and many of them
800
+ * churned as a result.
801
+ *
802
+ * HOW TO FIND IT:
803
+ * - Segment users by: count of "report submitted" events (3+ vs. fewer)
804
+ * - Compare: event volume before and after day 30
805
+ * - Filter: toxic_user = true
806
+ * - Compare: D30+ retention rates
807
+ *
808
+ * EXPECTED INSIGHT: Users who report 3+ pieces of content show a dramatic drop
809
+ * in activity after day 30. This simulates the real pattern where users exposed
810
+ * to toxic content eventually leave the platform.
811
+ *
812
+ * REAL-WORLD ANALOGUE: Content moderation challenges on social platforms.
813
+ * Users who encounter repeated toxic content (and report it) eventually churn,
814
+ * even though they're the "good actors" trying to improve the platform.
815
+ *
816
+ * -------------------------------------------------------------------------------------
817
+ * 8. WEEKEND CONTENT SURGE (event hook)
818
+ * -------------------------------------------------------------------------------------
819
+ *
820
+ * PATTERN: Post created and story created events that fall on Saturday or Sunday
821
+ * are tagged with weekend_surge: true. 30% of these weekend events generate a
822
+ * duplicate event 1-3 hours later (tagged weekend_duplicate: true).
823
+ *
824
+ * HOW TO FIND IT:
825
+ * - Chart: post created and story created events by day of week
826
+ * - Filter: weekend_surge = true or weekend_duplicate = true
827
+ * - Compare: weekday vs. weekend content creation volumes
828
+ *
829
+ * EXPECTED INSIGHT: Saturdays and Sundays show roughly 30% more content creation
830
+ * than weekdays. The weekly pattern is clearly visible in a time-series chart,
831
+ * creating a sawtooth pattern in content creation volume.
832
+ *
833
+ * REAL-WORLD ANALOGUE: Real social media usage patterns where users have more
834
+ * leisure time on weekends, leading to increased content creation and consumption.
835
+ * Most social platforms see clear weekly seasonality.
836
+ *
837
+ * =====================================================================================
838
+ * EXPECTED METRICS SUMMARY
839
+ * =====================================================================================
840
+ *
841
+ * Hook | Metric | Baseline | Hook Effect | Ratio
842
+ * --------------------------|-------------------------|--------------|----------------|-------
843
+ * Viral Content Cascade | Engagement per post | 1-2x | 10-20x | ~15x
844
+ * Follow-Back Snowball | Posts per user | ~4 | ~8 | 2x
845
+ * Algorithm Change | Feed vs. Explore source | 70/15 | 15/70 | Flip
846
+ * Engagement Bait | View duration (sec) | 15-30 | 1-5 | ~0.2x
847
+ * Notification Re-engage | Notification source % | ~10% | ~30% | 3x
848
+ * Creator Monetization | Content creation freq | 1x | 3x | 3x
849
+ * Toxicity Churn | Post-day-30 retention | ~80% | ~40% | 0.5x
850
+ * Weekend Content Surge | Weekend vs. weekday vol | 1x | 1.3x | 1.3x
851
+ *
852
+ * =====================================================================================
853
+ * CROSS-HOOK ANALYSIS IDEAS
854
+ * =====================================================================================
855
+ *
856
+ * 1. Viral Creators + Algorithm Change:
857
+ * Do viral creators benefit more from the explore-based algorithm? Compare viral
858
+ * cascade engagement before and after day 45. The explore algorithm may amplify
859
+ * viral content even further.
860
+ *
861
+ * 2. Follow-Back Snowball + Creator Monetization:
862
+ * Users who gain followers AND start creator subscriptions should be the most
863
+ * prolific content producers. The two hooks compound: 2x from follows * 3x from
864
+ * monetization = 6x content output.
865
+ *
866
+ * 3. Engagement Bait + Toxicity Churn:
867
+ * Do users who consume high amounts of engagement bait also submit more reports?
868
+ * Is there a correlation between engagement_bait exposure and toxic_user tagging?
869
+ *
870
+ * 4. Weekend Surge + Viral Cascade:
871
+ * Are viral cascades more likely on weekends when more content is created?
872
+ * The compounding of weekend surge + viral cascade should create extreme
873
+ * engagement spikes on weekend days.
874
+ *
875
+ * 5. Notification Re-engagement + Toxicity Churn:
876
+ * Do trending notifications help retain toxic_user-tagged users, or do they
877
+ * still churn despite re-engagement efforts?
878
+ *
879
+ * 6. Algorithm Change + Engagement Bait:
880
+ * Does the shift from feed to explore change the proportion of engagement bait?
881
+ * The explore algorithm may surface different content quality than the feed.
882
+ *
883
+ * 7. Creator Monetization + Viral Cascade:
884
+ * Monetized creators who are also viral should have astronomical engagement.
885
+ * These are the platform's most valuable users.
886
+ *
887
+ * 8. Follow-Back Snowball + Toxicity Churn:
888
+ * Do users who gain many followers also attract more reports? Is popularity
889
+ * correlated with toxicity exposure?
890
+ *
891
+ * =====================================================================================
892
+ * COHORT ANALYSIS IDEAS
893
+ * =====================================================================================
894
+ *
895
+ * - Cohort by signup method: Do google/apple signups retain better than email?
896
+ * - Cohort by content niche: Which niches produce the most viral creators?
897
+ * - Cohort by account type: How do personal vs. creator vs. business accounts differ?
898
+ * - Cohort by community membership: Do community members engage more?
899
+ * - Cohort by week: Users who joined during algorithm change (day 45) see a
900
+ * fundamentally different product experience
901
+ *
902
+ * =====================================================================================
903
+ * FUNNEL ANALYSIS IDEAS
904
+ * =====================================================================================
905
+ *
906
+ * - Onboarding Funnel: account created -> profile updated -> post created
907
+ * How does signup method affect onboarding completion?
908
+ * - Content Engagement Funnel: post viewed -> post liked -> comment posted
909
+ * Compare conversion by source (feed vs. explore vs. notification)
910
+ * - Creator Journey Funnel: post created -> post viewed -> post liked -> post shared
911
+ * How does the algorithm change affect creator content reach?
912
+ *
913
+ * =====================================================================================
914
+ * HOW TO RUN THIS DUNGEON
915
+ * =====================================================================================
916
+ *
917
+ * From the dm4 root directory:
918
+ *
919
+ * npm start
920
+ *
921
+ * Or programmatically:
922
+ *
923
+ * import generate from './index.js';
924
+ * import config from './dungeons/harness-social.js';
925
+ * const results = await generate(config);
926
+ *
927
+ * =====================================================================================
928
+ */