make-mp-data 3.0.4 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +46 -0
  2. package/dungeons/array-of-object-lookup-schema.json +327 -0
  3. package/dungeons/array-of-object-lookup.js +28 -8
  4. package/dungeons/capstone/capstone-ic3.js +291 -0
  5. package/dungeons/capstone/capstone-ic4.js +598 -0
  6. package/dungeons/capstone/capstone-ic5.js +668 -0
  7. package/dungeons/capstone/generate-product-lookup.js +309 -0
  8. package/dungeons/ecommerce-schema.json +462 -0
  9. package/dungeons/{copilot.js → ecommerce.js} +77 -15
  10. package/dungeons/education-schema.json +2409 -0
  11. package/dungeons/education.js +206 -442
  12. package/dungeons/fintech-schema.json +14034 -0
  13. package/dungeons/fintech.js +110 -389
  14. package/dungeons/foobar-schema.json +403 -0
  15. package/dungeons/foobar.js +27 -4
  16. package/dungeons/food-delivery-schema.json +192 -0
  17. package/dungeons/food-delivery.js +602 -0
  18. package/dungeons/food-schema.json +1152 -0
  19. package/dungeons/food.js +150 -383
  20. package/dungeons/gaming-schema.json +1270 -0
  21. package/dungeons/gaming.js +143 -3
  22. package/dungeons/insurance-application-schema.json +204 -0
  23. package/dungeons/insurance-application.js +605 -0
  24. package/dungeons/media-schema.json +906 -0
  25. package/dungeons/media.js +221 -391
  26. package/dungeons/retention-cadence-schema.json +78 -0
  27. package/dungeons/retention-cadence.js +35 -1
  28. package/dungeons/rpg-schema.json +4526 -0
  29. package/dungeons/rpg.js +130 -388
  30. package/dungeons/sanity-schema.json +255 -0
  31. package/dungeons/sanity.js +21 -10
  32. package/dungeons/sass-schema.json +1291 -0
  33. package/dungeons/sass.js +210 -337
  34. package/dungeons/scd-schema.json +919 -0
  35. package/dungeons/scd.js +38 -10
  36. package/dungeons/simple-schema.json +608 -0
  37. package/dungeons/simple.js +48 -11
  38. package/dungeons/simplest-schema.json +1418 -0
  39. package/dungeons/simplest.js +392 -0
  40. package/dungeons/social-schema.json +1118 -0
  41. package/dungeons/social.js +124 -365
  42. package/dungeons/text-generation-schema.json +3096 -0
  43. package/dungeons/text-generation.js +71 -0
  44. package/index.js +6 -3
  45. package/lib/core/config-validator.js +18 -0
  46. package/lib/core/storage.js +5 -5
  47. package/lib/generators/events.js +4 -4
  48. package/lib/orchestrators/mixpanel-sender.js +12 -7
  49. package/lib/orchestrators/user-loop.js +14 -6
  50. package/lib/templates/soup-presets.js +188 -0
  51. package/lib/utils/utils.js +52 -6
  52. package/package.json +1 -1
  53. package/types.d.ts +20 -3
  54. package/dungeons/adspend.js +0 -117
  55. package/dungeons/anon.js +0 -128
  56. package/dungeons/benchmark-heavy.js +0 -240
  57. package/dungeons/benchmark-light.js +0 -126
  58. package/dungeons/big.js +0 -226
  59. package/dungeons/business.js +0 -391
  60. package/dungeons/complex.js +0 -428
  61. package/dungeons/experiments.js +0 -137
  62. package/dungeons/funnels.js +0 -309
  63. package/dungeons/mil.js +0 -323
  64. package/dungeons/mirror.js +0 -160
  65. package/dungeons/soup-test.js +0 -52
  66. package/dungeons/streaming.js +0 -372
  67. package/dungeons/strict-event-test.js +0 -30
  68. package/dungeons/student-teacher.js +0 -438
  69. package/dungeons/too-big-events.js +0 -203
  70. package/dungeons/user-agent.js +0 -209
@@ -10,79 +10,167 @@ const chance = u.initChance(SEED);
10
10
  const num_users = 5_000;
11
11
  const days = 100;
12
12
 
13
- /** @typedef {import("../../types.js").Dungeon} Config */
13
+ /** @typedef {import("../types.d.ts").Dungeon} Config */
14
14
 
15
15
  /**
16
- * NEEDLE IN A HAYSTACK - ELEARNING APP DESIGN
16
+ * ═══════════════════════════════════════════════════════════════════════════════
17
+ * DATASET OVERVIEW — LearnPath eLearning Platform
18
+ * ═══════════════════════════════════════════════════════════════════════════════
17
19
  *
18
- * LearnPath - An online learning platform modeled after Coursera, Khan Academy, and Udemy.
19
- * The platform supports both self-paced and cohort-based learning, with a robust ecosystem
20
- * of courses, quizzes, assignments, and social study features.
20
+ * An online learning platform modeled after Coursera, Khan Academy, and Udemy.
21
+ * Supports self-paced and cohort-based learning with courses, quizzes,
22
+ * assignments, and social study features.
23
+ *
24
+ * Scale: 5,000 users / 600K events / 100 days / 17 event types
25
+ *
26
+ * CORE LOOP:
27
+ * Register → browse/enroll in courses → watch lectures → practice problems →
28
+ * quizzes/assignments → certificate earned. Social layer (study groups,
29
+ * discussions) drives retention. Subscription tiers (free/monthly/annual)
30
+ * gate completion rates.
31
+ *
32
+ * FUNNELS:
33
+ * - Onboarding: account registered → course enrolled → lecture started
34
+ * - Learning loop: lecture started → lecture completed → practice problem solved
35
+ * - Assessment: quiz started → quiz completed → assignment submitted
36
+ * - Course completion: course enrolled → lecture completed → quiz completed → certificate earned
37
+ * - Social learning: discussion posted → study group joined → resource downloaded
38
+ * - Instructor interaction: assignment submitted → assignment graded → instructor feedback given
39
+ * - Support/monetization: help requested → subscription purchased → course reviewed
40
+ *
41
+ * GROUPS: course_id (150 courses), group_id (300 study groups)
42
+ * SUBSCRIPTIONS: free (~60%), monthly, annual
43
+ * ACCOUNT TYPES: ~89% students, ~11% instructors (two-sided marketplace)
44
+ */
45
+
46
+ /**
47
+ * ═══════════════════════════════════════════════════════════════════════════════
48
+ * ANALYTICS HOOKS (8 architected patterns)
49
+ * ═══════════════════════════════════════════════════════════════════════════════
21
50
  *
22
- * CORE LEARNING LOOP:
23
- * Students register accounts, browse and enroll in courses across multiple categories
24
- * (CS, Math, Science, Business, Arts, Languages), then progress through structured
25
- * modules consisting of lectures, practice problems, quizzes, and assignments. Successful
26
- * completion of all requirements earns a certificate. The platform emphasizes active
27
- * learning through note-taking, practice problems, and peer discussion.
51
+ * 1. STUDENT VS INSTRUCTOR PROFILES
52
+ * Instructor profiles get teaching attributes (courses_created,
53
+ * teaching_experience_years, instructor_rating). Students get learning
54
+ * attributes (learning_goal, study_hours_per_week).
28
55
  *
29
- * COURSE SYSTEM (events: course enrolled -> lecture started -> lecture completed):
30
- * - Six course categories spanning technical and creative disciplines
31
- * - Three difficulty tiers: beginner, intermediate, advanced
32
- * - Free and paid course options (drives subscription analytics)
33
- * - 150 unique courses with varying lengths, ratings, and enrollment counts
34
- * - Modules (1-12 per course) contain lectures, quizzes, and assignments
56
+ * Mixpanel reports:
57
+ * Insights Any event Unique users Breakdown: "account_type"
58
+ * Expected: ~89% students, ~11% instructors
59
+ * Insights "instructor feedback given" Total per user → Breakdown: "account_type"
60
+ * Expected: Instructors dominate feedback; students show learning_goal instead
35
61
  *
36
- * LECTURE EXPERIENCE (events: lecture started -> lecture completed):
37
- * - Variable lecture durations (5-60 minutes) reflecting real MOOC patterns
38
- * - Playback speed options (0.75x to 2.0x) reveal learning style differences
39
- * - Note-taking tracking creates a behavioral signal for student diligence
40
- * - Watch time vs. lecture duration measures actual engagement
62
+ * 2. DEADLINE CRAMMING
63
+ * Assignments submitted on Sun/Mon are rushed: 60% late (vs ~20% baseline),
64
+ * quiz scores drop by 25 points. Events carry is_deadline_rush: true.
41
65
  *
42
- * ASSESSMENT SYSTEM (events: quiz started -> quiz completed, assignment submitted -> graded):
43
- * - Practice quizzes (low stakes, unlimited attempts) vs. graded quizzes vs. final exams
44
- * - Assignments support text, code, file upload, and project submissions
45
- * - Grading by instructors, peers, or auto-grader (reflects real platform patterns)
46
- * - Score tracking enables learning outcome analytics
66
+ * Mixpanel reports:
67
+ * Insights "assignment submitted" Total Breakdown: "is_deadline_rush"
68
+ * Expected: is_deadline_rush=true shows ~60% late rate vs ~20% baseline
69
+ * Insights "quiz completed" Avg "score_percent" Breakdown: Day of Week
70
+ * Expected: Sun/Mon scores ~25 points lower (~40 vs ~65)
47
71
  *
48
- * PRACTICE PROBLEMS (event: practice problem solved):
49
- * - Difficulty-tiered problems (easy, medium, hard) for skill building
50
- * - Hint system creates a measurable dependency pattern
51
- * - Time-to-solve metrics reveal mastery progression
52
- * - High volume (weight: 12) reflects real platform usage patterns
72
+ * 3. NOTES-TAKERS SUCCEED
73
+ * Students with 5+ notes_taken=true lectures get +20 quiz score boost
74
+ * (capped at 100) and 40% chance of bonus certificate. Marked diligent_student: true.
53
75
  *
54
- * SOCIAL LEARNING (events: discussion posted, study group joined):
55
- * - Discussion forums with questions, answers, and comments
56
- * - Study groups (study circles, project teams, tutoring groups)
57
- * - Social features drive retention (a key hook pattern)
76
+ * Mixpanel reports:
77
+ * Insights "quiz completed" Avg "score_percent" → Breakdown: "diligent_student"
78
+ * Expected: diligent_student=true 85 avg vs ~65 baseline (+20 pts)
79
+ * Insights "certificate earned" Total per user → Breakdown: "diligent_student"
80
+ * Expected: diligent_student=true earn ~40% more certificates
58
81
  *
59
- * INSTRUCTOR ECOSYSTEM (events: instructor feedback given, assignment graded):
60
- * - Instructors create courses, grade assignments, and provide feedback
61
- * - Written, video, and rubric-based feedback types
62
- * - Response time tracking (1-72 hours) measures instructor engagement
82
+ * 4. STUDY GROUP RETENTION
83
+ * Early study group joiners (within 10 days) retain and get bonus discussions.
84
+ * Non-joiners with low quiz scores (<60) churn hard at day 14 (all later events removed).
63
85
  *
64
- * MONETIZATION (event: subscription purchased):
65
- * - Three tiers: monthly ($19.99), annual ($149.99), lifetime ($499.99)
66
- * - Free tier with limited access (most users)
67
- * - Subscription status affects course completion funnels (Hook #7)
86
+ * Mixpanel reports:
87
+ * Retention A: "account registered" B: Any event → Segment by early study group join
88
+ * Expected: Early joiners ~90% D14 retention; non-joiners with low scores ~30%
89
+ * Insights "discussion posted" Total per user → Breakdown: "study_group_member"
90
+ * Expected: study_group_member=true users post more
68
91
  *
69
- * SUPPORT SYSTEM (event: help requested):
70
- * - Four topic categories: technical, content, billing, accessibility
71
- * - Three channels: chat, email, forum
72
- * - Tracks student friction points
92
+ * 5. HINT DEPENDENCY
93
+ * Hint users get 60% chance of easy problems; non-hint users get 40% chance of
94
+ * hard problems with independent_solver: true.
95
+ *
96
+ * Mixpanel reports:
97
+ * • Insights → "practice problem solved" → Total → Breakdown: "difficulty" → Filter: hint_used=true
98
+ * Expected: ~60% easy (vs ~33% baseline)
99
+ * • Insights → "practice problem solved" → Total → Breakdown: "difficulty" → Filter: hint_used=false
100
+ * Expected: ~40% hard (vs ~33% baseline)
101
+ *
102
+ * 6. SEMESTER-END SPIKE
103
+ * Days 75-85: quiz_started, quiz_completed, assignment_submitted events duplicated
104
+ * at 80% rate. Events carry semester_end_rush: true.
73
105
  *
74
- * COURSE REVIEWS (event: course reviewed):
75
- * - 1-5 star ratings with written reviews
76
- * - Would-recommend boolean for NPS-style analysis
77
- * - Review length correlates with sentiment strength
106
+ * Mixpanel reports:
107
+ * Insights (line) "quiz started" + "quiz completed" + "assignment submitted" → Daily
108
+ * Expected: ~2x volume spike during days 75-85
109
+ * Insights "quiz completed" Total → Breakdown: "semester_end_rush"
110
+ * Expected: semester_end_rush=true clusters in days 75-85
111
+ *
112
+ * 7. FREE VS PAID COURSES
113
+ * Free users get 0.5x funnel conversion rate; paid subscribers get 1.5x.
114
+ * Free users also lose 55% of certificates. Creates ~2.2x completion gap.
115
+ *
116
+ * Mixpanel reports:
117
+ * • Funnels → "course enrolled" → "lecture completed" → "quiz completed" → "certificate earned"
118
+ * Breakdown: "subscription_status"
119
+ * Expected: free ≈ 15% completion, paid ≈ 33% (~2.2x difference)
120
+ * • Insights → "certificate earned" → Total per user → Breakdown: "subscription_status"
121
+ * Expected: Paid subscribers earn significantly more certificates
122
+ *
123
+ * 8. PLAYBACK SPEED CORRELATION
124
+ * Speed learners (>=2.0x, 3+ lectures): compressed watch_time (0.6x),
125
+ * paradoxically higher quiz scores (+8 pts). Thorough learners (<=1.0x):
126
+ * extended watch_time (1.4x).
127
+ *
128
+ * Mixpanel reports:
129
+ * • Insights → "lecture completed" → Avg "watch_time_mins" → Breakdown: "speed_learner"
130
+ * Expected: speed_learner=true ≈ 0.6x watch time
131
+ * • Insights → "quiz completed" → Avg "score_percent" → Breakdown: "speed_learner_effect"
132
+ * Expected: speed_learner_effect=true shows +8 points (faster = better)
133
+ *
134
+ * ═══════════════════════════════════════════════════════════════════════════════
135
+ * ADVANCED ANALYSIS IDEAS
136
+ * ═══════════════════════════════════════════════════════════════════════════════
137
+ *
138
+ * CROSS-HOOK PATTERNS:
139
+ * - The Ideal Student: notes (H3) + study groups (H4) + no hints (H5) + paid (H7) + speed (H8)
140
+ * - Cramming Cascade: deadline crammers (H2) compounded with semester-end spike (H6)?
141
+ * - Social Safety Net: does early study group joining (H4) prevent churn for low scorers?
142
+ * - Hint-to-Mastery: do hint-dependent (H5) students who join groups (H4) wean off hints?
143
+ * - Payment + Notes: are paid subscribers (H7) more likely to take notes (H3)?
144
+ *
145
+ * COHORT ANALYSIS:
146
+ * - By education level: PhD vs self-taught hook patterns
147
+ * - By learning style: visual vs hands-on note-taking rates
148
+ * - By platform: mobile vs desktop playback speed preferences
149
+ * - By course category: CS vs Arts hint usage
78
150
  *
79
- * WHY THESE EVENTS/PROPERTIES?
80
- * - Events model the complete student lifecycle: onboarding -> engagement -> mastery -> certification
81
- * - Properties enable cohort analysis: learning style, education level, account type, subscription status
82
- * - Funnels reveal friction: where do students drop off in onboarding, course completion, practice mastery?
83
- * - Behavioral signals (notes, hints, playback speed, study groups) create discoverable skill gaps
84
- * - Social features (study groups, discussions) and monetization (subscriptions) drive business metrics
85
- * - The "needle in haystack" hooks simulate real EdTech insights hidden in production data
151
+ * FUNNEL ANALYSIS:
152
+ * - Onboarding by account_type
153
+ * - Course completion by subscription, notes, study groups
154
+ * - Practice mastery by hint usage, speed, learning style
155
+ *
156
+ * ═══════════════════════════════════════════════════════════════════════════════
157
+ * EXPECTED METRICS SUMMARY
158
+ * ═══════════════════════════════════════════════════════════════════════════════
159
+ *
160
+ * Hook | Metric | Baseline | Hook Effect | Ratio
161
+ * ────────────────────────|───────────────────────|──────────|──────────────|──────
162
+ * Student vs Instructor | Profile attributes | generic | role-specific| N/A
163
+ * Deadline Cramming | Late submission rate | ~20% | ~60% | 3x
164
+ * Deadline Cramming | Quiz score (Sun/Mon) | ~65 | ~40 | -25pt
165
+ * Notes-Takers Succeed | Quiz score | ~65 | ~85 | +20pt
166
+ * Notes-Takers Succeed | Certificate rate | baseline | +40% | 1.4x
167
+ * Study Group Retention | D14 retention | ~40% | ~90% | 2.3x
168
+ * Study Group Retention | Post-D14 events | 100% | 30% (churn) | 0.3x
169
+ * Hint Dependency | Easy problem rate | ~33% | ~60% | 1.8x
170
+ * Hint Dependency | Hard problem rate | ~33% | ~40% (no hint)| 1.2x
171
+ * Semester-End Spike | Assessment volume | baseline | ~2x | 2x
172
+ * Free vs Paid | Course completion | 15% | 33% | 2.2x
173
+ * Playback Speed | Quiz score (speed) | ~65 | ~73 | +8pt
86
174
  */
87
175
 
88
176
  // Generate consistent IDs for lookup tables and event properties
@@ -339,13 +427,13 @@ const config = {
339
427
 
340
428
  superProps: {
341
429
  platform: ["Web", "iOS", "Android", "iPad"],
342
- subscription_status: u.pickAWinner(["free", "free", "free", "monthly", "annual"]),
343
430
  },
344
431
 
345
432
  scdProps: {},
346
433
 
347
434
  userProps: {
348
435
  "account_type": u.pickAWinner(["student", "student", "student", "student", "student", "student", "student", "student", "instructor"]),
436
+ "subscription_status": u.pickAWinner(["free", "free", "free", "monthly", "annual"]),
349
437
  "learning_style": ["visual", "reading", "hands_on", "auditory"],
350
438
  "education_level": ["high_school", "bachelors", "masters", "phd", "self_taught"],
351
439
  "timezone": ["US_Eastern", "US_Pacific", "US_Central", "Europe", "Asia"],
@@ -420,14 +508,9 @@ const config = {
420
508
  }
421
509
  }
422
510
 
423
- if (record.event === "quiz completed" && record.time) {
424
- const eventDay = dayjs(record.time).day();
425
- if (eventDay === 0 || eventDay === 1) {
426
- if (record.score_percent !== undefined) {
427
- record.score_percent = Math.max(0, record.score_percent - 15);
428
- }
429
- }
430
- }
511
+ // Quiz score penalty moved to everything hook (after churn removal)
512
+ // to avoid selection bias — the penalty was causing more Sun/Mon
513
+ // quiz-takers to trigger hasLowQuizScore churn, inflating their avg
431
514
  }
432
515
 
433
516
  // ═══════════════════════════════════════════════════════════════════
@@ -481,7 +564,7 @@ const config = {
481
564
  if (record.event === "lecture completed") {
482
565
  const speed = record.playback_speed;
483
566
 
484
- if (speed >= 1.5) {
567
+ if (speed >= 2.0) {
485
568
  record.speed_learner = true;
486
569
  record.thorough_learner = false;
487
570
  // Compress watch time for speed learners
@@ -572,42 +655,14 @@ const config = {
572
655
  }
573
656
  }
574
657
 
575
- // Hook #4: STUDY GROUP RETENTION
576
- if (!joinedStudyGroupEarly && hasLowQuizScore) {
577
- // Non-joiners with low scores: remove 70% of events after day 14 (churn)
578
- const churnCutoff = firstEventTime ? firstEventTime.add(14, 'days') : null;
579
- for (let i = userEvents.length - 1; i >= 0; i--) {
580
- const evt = userEvents[i];
581
- if (churnCutoff && dayjs(evt.time).isAfter(churnCutoff)) {
582
- if (chance.bool({ likelihood: 70 })) {
583
- userEvents.splice(i, 1);
584
- }
585
- }
586
- }
587
- } else if (joinedStudyGroupEarly) {
588
- // Study group joiners keep all events and get bonus discussion_posted events
589
- const lastEvent = userEvents[userEvents.length - 1];
590
- if (lastEvent && chance.bool({ likelihood: 60 })) {
591
- const bonusDiscussion = {
592
- event: "discussion posted",
593
- time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 3 }), 'days').toISOString(),
594
- user_id: lastEvent.user_id,
595
- course_id: chance.pickone(courseIds),
596
- post_type: chance.pickone(["question", "answer", "comment"]),
597
- word_count: chance.integer({ min: 20, max: 400 }),
598
- study_group_member: true,
599
- };
600
- userEvents.push(bonusDiscussion);
601
- }
602
- }
603
-
604
- // Hook #8 (everything pass): Speed learners get slightly HIGHER quiz scores
605
- let isSpeedLearner = false;
658
+ // Hook #8 (everything pass): Speed learners (3+ lectures at 2.0x) get higher quiz scores
659
+ let speedLectureCount = 0;
606
660
  userEvents.forEach((event) => {
607
661
  if (event.event === "lecture completed" && event.speed_learner === true) {
608
- isSpeedLearner = true;
662
+ speedLectureCount++;
609
663
  }
610
664
  });
665
+ const isSpeedLearner = speedLectureCount >= 3;
611
666
 
612
667
  if (isSpeedLearner) {
613
668
  userEvents.forEach((event) => {
@@ -623,7 +678,7 @@ const config = {
623
678
  // Hook #6: SEMESTER-END SPIKE - duplicate assessment events in the spike window
624
679
  const duplicates = [];
625
680
  userEvents.forEach((event) => {
626
- if (event.semester_end_rush === true && chance.bool({ likelihood: 50 })) {
681
+ if (event.semester_end_rush === true && chance.bool({ likelihood: 80 })) {
627
682
  const dup = JSON.parse(JSON.stringify(event));
628
683
  dup.time = dayjs(event.time).add(chance.integer({ min: 5, max: 120 }), 'minutes').toISOString();
629
684
  dup.semester_end_rush = true;
@@ -635,15 +690,57 @@ const config = {
635
690
  }
636
691
 
637
692
  // Hook #7: FREE VS PAID - reinforce the subscription effect on certificates
638
- const subStatus = userEvents.length > 0 ? userEvents[0].subscription_status : "free";
693
+ const subStatus = meta && meta.profile ? meta.profile.subscription_status : "free";
639
694
  if (subStatus === "free") {
640
- // Free users lose 40% of their certificates (simulating lower completion)
695
+ // Free users lose 55% of their certificates (simulating lower completion)
641
696
  for (let i = userEvents.length - 1; i >= 0; i--) {
642
- if (userEvents[i].event === "certificate earned" && chance.bool({ likelihood: 40 })) {
697
+ if (userEvents[i].event === "certificate earned" && chance.bool({ likelihood: 55 })) {
643
698
  userEvents.splice(i, 1);
644
699
  }
645
700
  }
646
701
  }
702
+
703
+ // Hook #4: STUDY GROUP RETENTION (runs LAST to ensure churn removal isn't undone by later hooks)
704
+ if (!joinedStudyGroupEarly && hasLowQuizScore) {
705
+ // Non-joiners with low scores: remove ALL events after day 14 from their first event (hard churn)
706
+ const churnCutoff = firstEventTime ? firstEventTime.add(14, 'days') : null;
707
+ for (let i = userEvents.length - 1; i >= 0; i--) {
708
+ const evt = userEvents[i];
709
+ if (churnCutoff && dayjs(evt.time).isAfter(churnCutoff)) {
710
+ userEvents.splice(i, 1);
711
+ }
712
+ }
713
+ } else if (joinedStudyGroupEarly) {
714
+ // Study group joiners keep all events and get bonus discussion_posted events
715
+ const lastEvent = userEvents[userEvents.length - 1];
716
+ if (lastEvent && chance.bool({ likelihood: 60 })) {
717
+ const bonusDiscussion = {
718
+ event: "discussion posted",
719
+ time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 3 }), 'days').toISOString(),
720
+ user_id: lastEvent.user_id,
721
+ course_id: chance.pickone(courseIds),
722
+ post_type: chance.pickone(["question", "answer", "comment"]),
723
+ word_count: chance.integer({ min: 20, max: 400 }),
724
+ study_group_member: true,
725
+ };
726
+ userEvents.push(bonusDiscussion);
727
+ }
728
+ }
729
+
730
+ // Hook #2b: DEADLINE CRAMMING (quiz score penalty)
731
+ // Applied LAST to avoid selection bias — if applied before churn,
732
+ // the penalty pushes Sun/Mon quiz-takers below the hasLowQuizScore
733
+ // threshold, selectively churning them and inflating the avg.
734
+ userEvents.forEach((event) => {
735
+ if (event.event === "quiz completed" && event.time) {
736
+ const eventDay = dayjs(event.time).day();
737
+ if (eventDay === 0 || eventDay === 1) {
738
+ if (event.score_percent !== undefined) {
739
+ event.score_percent = Math.max(0, event.score_percent - 25);
740
+ }
741
+ }
742
+ }
743
+ });
647
744
  }
648
745
 
649
746
  // ═══════════════════════════════════════════════════════════════════
@@ -655,11 +752,11 @@ const config = {
655
752
  const subscriptionStatus = meta.profile.subscription_status;
656
753
 
657
754
  if (subscriptionStatus === "free") {
658
- // Free users convert at 0.6x rate
659
- record.conversionRate = (record.conversionRate || 0.25) * 0.6;
755
+ // Free users convert at 0.5x rate
756
+ record.conversionRate = (record.conversionRate || 0.25) * 0.5;
660
757
  } else if (subscriptionStatus === "monthly" || subscriptionStatus === "annual") {
661
- // Paid subscribers convert at 1.3x rate
662
- record.conversionRate = (record.conversionRate || 0.25) * 1.3;
758
+ // Paid subscribers convert at 1.5x rate
759
+ record.conversionRate = (record.conversionRate || 0.25) * 1.5;
663
760
  }
664
761
  }
665
762
  }
@@ -669,336 +766,3 @@ const config = {
669
766
  };
670
767
 
671
768
  export default config;
672
-
673
- /**
674
- * ═══════════════════════════════════════════════════════════════════════════════
675
- * NEEDLE IN A HAYSTACK - LEARNPATH ELEARNING ANALYTICS
676
- * ═══════════════════════════════════════════════════════════════════════════════
677
- *
678
- * An online learning platform dungeon with 8 deliberately architected analytics
679
- * insights hidden in the data. This dungeon simulates a real EdTech product
680
- * (like Coursera, Khan Academy, or Udemy) and demonstrates how meaningful
681
- * student behavior patterns can be discovered through product analytics.
682
- *
683
- * ═══════════════════════════════════════════════════════════════════════════════
684
- * DATASET OVERVIEW
685
- * ═══════════════════════════════════════════════════════════════════════════════
686
- *
687
- * - 5,000 users over 100 days
688
- * - 360K events across 17 event types
689
- * - 3 funnels (student onboarding, course completion, practice mastery)
690
- * - Group analytics (courses, study groups)
691
- * - Lookup tables (courses, quizzes)
692
- * - Subscription tiers (free, monthly, annual)
693
- *
694
- * ═══════════════════════════════════════════════════════════════════════════════
695
- * THE 8 ARCHITECTED HOOKS
696
- * ═══════════════════════════════════════════════════════════════════════════════
697
- *
698
- * Each hook creates a specific, discoverable analytics insight that simulates
699
- * real-world EdTech behavior patterns.
700
- *
701
- * ───────────────────────────────────────────────────────────────────────────────
702
- * 1. STUDENT VS INSTRUCTOR PROFILES
703
- * ───────────────────────────────────────────────────────────────────────────────
704
- *
705
- * PATTERN: User profiles are enriched based on account_type. Instructors receive
706
- * teaching attributes (courses_created, teaching_experience_years, instructor_rating).
707
- * Students receive learning attributes (learning_goal, study_hours_per_week).
708
- *
709
- * HOW TO FIND IT:
710
- * - Segment users by: account_type = "instructor" vs "student"
711
- * - Compare: presence of courses_created vs learning_goal properties
712
- * - Filter profiles: instructor_rating exists (instructor-only property)
713
- *
714
- * EXPECTED INSIGHT: ~11% of users are instructors with teaching-specific metrics.
715
- * Instructors should show different event patterns (more feedback given, fewer
716
- * quizzes completed). Students show learning-goal-driven behavior differences.
717
- *
718
- * REAL-WORLD ANALOGUE: Two-sided marketplace profiling. Drivers vs riders in
719
- * Uber, sellers vs buyers in eBay - each persona has unique attributes and
720
- * behavioral patterns that require separate analysis.
721
- *
722
- * ───────────────────────────────────────────────────────────────────────────────
723
- * 2. DEADLINE CRAMMING
724
- * ───────────────────────────────────────────────────────────────────────────────
725
- *
726
- * PATTERN: Assignments submitted on Sundays and Mondays show deadline-rush
727
- * behavior: 60% are late (vs ~20% baseline) and quiz scores drop by 15 points.
728
- * These events carry is_deadline_rush: true.
729
- *
730
- * HOW TO FIND IT:
731
- * - Chart: assignment_submitted by day of week
732
- * - Compare: is_late rate by day of week
733
- * - Compare: quiz_completed score_percent by day of week
734
- * - Filter: is_deadline_rush = true
735
- *
736
- * EXPECTED INSIGHT: Clear quality drop on Sun/Mon. Late submission rate spikes
737
- * from ~20% to ~60%. Quiz scores taken on crunch days average 15 points lower.
738
- * This creates a visible "weekend dip" in student performance metrics.
739
- *
740
- * REAL-WORLD ANALOGUE: The "Sunday Scaries" of EdTech - students procrastinate
741
- * and cram before Monday deadlines. Identical to real patterns seen in Coursera
742
- * and university LMS data where submission quality drops near deadlines.
743
- *
744
- * ───────────────────────────────────────────────────────────────────────────────
745
- * 3. NOTES-TAKERS SUCCEED
746
- * ───────────────────────────────────────────────────────────────────────────────
747
- *
748
- * PATTERN: Students who take notes during 5 or more lecture_completed events
749
- * receive a +20 boost to all quiz scores (capped at 100), and have a 40% chance
750
- * of earning an extra certificate. Events are marked diligent_student: true.
751
- *
752
- * HOW TO FIND IT:
753
- * - Create segment: users with 5+ lecture_completed where notes_taken = true
754
- * - Compare: average quiz_completed score_percent
755
- * - Compare: certificate_earned count per user
756
- * - Filter: diligent_student = true
757
- *
758
- * EXPECTED INSIGHT: Diligent note-takers score ~20 points higher on quizzes
759
- * and earn certificates at a significantly higher rate. This is a classic
760
- * "active learning" signal visible in the data.
761
- *
762
- * REAL-WORLD ANALOGUE: Active engagement features (highlighting, bookmarking,
763
- * note-taking) that correlate with better learning outcomes. Real research
764
- * confirms note-taking improves retention by 30-40% - this hook models that.
765
- *
766
- * ───────────────────────────────────────────────────────────────────────────────
767
- * 4. STUDY GROUP RETENTION
768
- * ───────────────────────────────────────────────────────────────────────────────
769
- *
770
- * PATTERN: Students who join a study group within their first 10 days and have
771
- * passing quiz scores retain normally and receive bonus discussion events. Students
772
- * who do NOT join early AND have quiz scores below 60 experience severe churn:
773
- * 70% of their events after day 14 are removed.
774
- *
775
- * HOW TO FIND IT:
776
- * - Create cohort: users who did "study group joined" within first 10 days
777
- * - Compare: D14/D30 retention rate vs non-joiners
778
- * - Compare: total events per user after day 14
779
- * - Filter: study_group_member = true on bonus events
780
- *
781
- * EXPECTED INSIGHT: Early study group joiners show dramatically better retention
782
- * curves. Non-joiners with low quiz scores show a cliff-like drop in activity
783
- * after day 14. The combination of social isolation + poor performance predicts
784
- * churn with high accuracy.
785
- *
786
- * REAL-WORLD ANALOGUE: Social learning features that create accountability and
787
- * community. MOOCs with study groups or cohort-based programs consistently show
788
- * 3-5x higher completion rates than pure self-paced learning.
789
- *
790
- * ───────────────────────────────────────────────────────────────────────────────
791
- * 5. HINT DEPENDENCY
792
- * ───────────────────────────────────────────────────────────────────────────────
793
- *
794
- * PATTERN: In practice_problem_solved events, students who use hints have a 60%
795
- * chance of having their problem difficulty set to "easy". Students who solve
796
- * without hints have a 40% chance of tackling "hard" problems and receive
797
- * independent_solver: true.
798
- *
799
- * HOW TO FIND IT:
800
- * - Segment practice_problem_solved by: hint_used = true vs false
801
- * - Compare: difficulty distribution (easy vs medium vs hard)
802
- * - Filter: independent_solver = true
803
- * - Compare: average time_to_solve_sec by hint usage
804
- *
805
- * EXPECTED INSIGHT: Hint users cluster on easy problems; non-hint users tackle
806
- * harder problems. This creates a visible "hint dependency" where the scaffolding
807
- * intended to help students actually limits their growth trajectory.
808
- *
809
- * REAL-WORLD ANALOGUE: The "training wheels" problem in education technology.
810
- * Hints, auto-complete, and guided solutions can create dependency rather than
811
- * building genuine competence. Real platforms like LeetCode and HackerRank
812
- * observe this pattern.
813
- *
814
- * ───────────────────────────────────────────────────────────────────────────────
815
- * 6. SEMESTER-END SPIKE
816
- * ───────────────────────────────────────────────────────────────────────────────
817
- *
818
- * PATTERN: During days 75-85 of the dataset, quiz_started, quiz_completed, and
819
- * assignment_submitted events have a 50% chance of being duplicated (with slightly
820
- * offset timestamps). All events in this window carry semester_end_rush: true.
821
- *
822
- * HOW TO FIND IT:
823
- * - Chart: quiz_started, quiz_completed, assignment_submitted counts by day
824
- * - Look for: clear volume spike during days 75-85
825
- * - Filter: semester_end_rush = true
826
- * - Compare: event volume in days 75-85 vs days 60-75 (baseline)
827
- *
828
- * EXPECTED INSIGHT: Assessment activity roughly doubles during the "finals"
829
- * period. This creates a visible spike in the time series that mirrors real
830
- * academic calendar patterns.
831
- *
832
- * REAL-WORLD ANALOGUE: End-of-semester, end-of-quarter, or end-of-trial
833
- * behavior spikes. Every EdTech platform sees massive activity surges before
834
- * deadlines, certification exams, or subscription renewal dates.
835
- *
836
- * ───────────────────────────────────────────────────────────────────────────────
837
- * 7. FREE VS PAID COURSES
838
- * ───────────────────────────────────────────────────────────────────────────────
839
- *
840
- * PATTERN: The Course Completion funnel conversion rate is modified by the user's
841
- * subscription_status. Free users convert at 0.6x the base rate; monthly and
842
- * annual subscribers convert at 1.3x. This creates a ~2.2x difference between
843
- * free and paid users in course completion.
844
- *
845
- * HOW TO FIND IT:
846
- * - Segment the Course Completion funnel by: subscription_status
847
- * - Compare: funnel conversion rates for free vs monthly vs annual
848
- * - Compare: certificate_earned counts by subscription_status
849
- *
850
- * EXPECTED INSIGHT: Paid subscribers are roughly 2x more likely to complete
851
- * courses end-to-end. Free users drop off heavily between quiz_completed and
852
- * certificate_earned. This mirrors the "skin in the game" effect.
853
- *
854
- * REAL-WORLD ANALOGUE: The well-documented correlation between payment and
855
- * completion in online education. Paid Coursera learners complete courses at
856
- * 5-10x the rate of free audit-track learners. Financial commitment creates
857
- * psychological commitment.
858
- *
859
- * ───────────────────────────────────────────────────────────────────────────────
860
- * 8. PLAYBACK SPEED CORRELATION
861
- * ───────────────────────────────────────────────────────────────────────────────
862
- *
863
- * PATTERN: In lecture_completed events, playback speed creates two distinct
864
- * learner segments:
865
- * - Speed learners (>= 1.5x): get speed_learner: true, compressed watch_time
866
- * (0.6x), and paradoxically HIGHER quiz scores (+8 points)
867
- * - Thorough learners (<= 1.0x): get thorough_learner: true, extended watch_time
868
- * (1.4x)
869
- *
870
- * HOW TO FIND IT:
871
- * - Segment lecture_completed by: playback_speed
872
- * - Compare: average watch_time_mins by speed bucket
873
- * - Compare: subsequent quiz_completed score_percent
874
- * - Filter: speed_learner = true or thorough_learner = true
875
- * - Correlate: playback_speed with quiz performance
876
- *
877
- * EXPECTED INSIGHT: Counter-intuitively, speed learners score slightly higher
878
- * on quizzes despite watching lectures faster. This suggests that playback speed
879
- * is a proxy for prior knowledge or aptitude, not laziness.
880
- *
881
- * REAL-WORLD ANALOGUE: Research on lecture playback speed consistently shows
882
- * that students who watch at 1.5-2x speed perform equally or better on assessments.
883
- * Speed selection correlates with confidence and familiarity with the material,
884
- * not with learning quality.
885
- *
886
- * ═══════════════════════════════════════════════════════════════════════════════
887
- * ADVANCED ANALYSIS IDEAS
888
- * ═══════════════════════════════════════════════════════════════════════════════
889
- *
890
- * CROSS-HOOK PATTERNS:
891
- *
892
- * 1. The Ideal Student: Users who:
893
- * - Take notes consistently (Hook #3)
894
- * - Join study groups early (Hook #4)
895
- * - Solve problems without hints (Hook #5)
896
- * - Have paid subscriptions (Hook #7)
897
- * - Watch lectures at higher speed (Hook #8)
898
- * These students should have exceptional completion rates and quiz scores.
899
- *
900
- * 2. The Cramming Cascade: Do deadline crammers (Hook #2) also show up
901
- * in the semester-end spike (Hook #6)? Is the quality drop compounded?
902
- *
903
- * 3. Social Safety Net: Does early study group joining (Hook #4) prevent
904
- * churn even for students who struggle on quizzes?
905
- *
906
- * 4. Hint-to-Mastery Pipeline: Do hint-dependent students (Hook #5) who
907
- * later join study groups (Hook #4) eventually wean off hints?
908
- *
909
- * 5. Payment + Notes: Are paid subscribers (Hook #7) more likely to take
910
- * notes (Hook #3)? Does the combination create a super-performer segment?
911
- *
912
- * COHORT ANALYSIS:
913
- *
914
- * - Cohort by education level: Do PhD students vs self-taught learners
915
- * show different hook patterns?
916
- * - Cohort by learning style: Do visual vs hands-on learners take more notes?
917
- * - Cohort by platform: Do mobile (iOS/Android) users have different playback
918
- * speed preferences than Web/iPad users?
919
- * - Cohort by course category: Do CS students use hints more than Arts students?
920
- *
921
- * FUNNEL ANALYSIS:
922
- *
923
- * - Onboarding Funnel: How does account_type affect the register -> enroll ->
924
- * first lecture conversion?
925
- * - Course Completion Funnel: Compare by subscription_status, note-taking
926
- * behavior, and study group membership
927
- * - Practice Mastery Funnel: Compare by hint usage, playback speed, and
928
- * learning style
929
- *
930
- * ═══════════════════════════════════════════════════════════════════════════════
931
- * EXPECTED METRICS SUMMARY
932
- * ═══════════════════════════════════════════════════════════════════════════════
933
- *
934
- * Hook | Metric | Baseline | Hook Effect | Ratio
935
- * ────────────────────────|───────────────────────|──────────|──────────────|──────
936
- * Student vs Instructor | Profile attributes | generic | role-specific| N/A
937
- * Deadline Cramming | Late submission rate | ~20% | ~60% | 3x
938
- * Deadline Cramming | Quiz score (Sun/Mon) | ~65 | ~50 | -15pt
939
- * Notes-Takers Succeed | Quiz score | ~65 | ~85 | +20pt
940
- * Notes-Takers Succeed | Certificate rate | baseline | +40% | 1.4x
941
- * Study Group Retention | D14 retention | ~40% | ~90% | 2.3x
942
- * Study Group Retention | Post-D14 events | 100% | 30% (churn) | 0.3x
943
- * Hint Dependency | Easy problem rate | ~33% | ~60% | 1.8x
944
- * Hint Dependency | Hard problem rate | ~33% | ~40% (no hint)| 1.2x
945
- * Semester-End Spike | Assessment volume | baseline | ~2x | 2x
946
- * Free vs Paid | Course completion | 15% | 33% | 2.2x
947
- * Playback Speed | Quiz score (speed) | ~65 | ~73 | +8pt
948
- *
949
- * ═══════════════════════════════════════════════════════════════════════════════
950
- * HOW TO RUN THIS DUNGEON
951
- * ═══════════════════════════════════════════════════════════════════════════════
952
- *
953
- * From the dm4 root directory:
954
- *
955
- * npm start
956
- *
957
- * Or programmatically:
958
- *
959
- * import generate from './index.js';
960
- * import config from './dungeons/harness-education.js';
961
- * const results = await generate(config);
962
- *
963
- * OUTPUT FILES (with writeToDisk: false, format: "json", gzip: true):
964
- *
965
- * - needle-haystack-education__events.json.gz - All event data
966
- * - needle-haystack-education__user_profiles.json.gz - User profiles
967
- * - needle-haystack-education__group_profiles.json.gz - Course & study group profiles
968
- * - needle-haystack-education__course_id_lookup.json.gz - Course catalog
969
- * - needle-haystack-education__quiz_id_lookup.json.gz - Quiz catalog
970
- *
971
- * ═══════════════════════════════════════════════════════════════════════════════
972
- * TESTING YOUR ANALYTICS PLATFORM
973
- * ═══════════════════════════════════════════════════════════════════════════════
974
- *
975
- * This dungeon is perfect for testing:
976
- *
977
- * 1. Segmentation: Can you separate instructor vs student behavior patterns?
978
- * 2. Temporal Analysis: Can you detect the deadline cramming and semester-end spike?
979
- * 3. Behavioral Correlation: Can you discover the note-taking success pattern?
980
- * 4. Retention Analysis: Can you identify the study group retention effect?
981
- * 5. Feature Impact: Can you measure hint dependency on problem difficulty?
982
- * 6. Anomaly Detection: Can you automatically detect the semester-end volume spike?
983
- * 7. Funnel Analysis: Can you quantify the free vs paid completion gap?
984
- * 8. Counter-intuitive Insight: Can you find the speed learner paradox?
985
- *
986
- * ═══════════════════════════════════════════════════════════════════════════════
987
- * WHY "NEEDLE IN A HAYSTACK"?
988
- * ═══════════════════════════════════════════════════════════════════════════════
989
- *
990
- * Each hook is a "needle" - a meaningful, actionable insight hidden in a
991
- * "haystack" of 360K events. The challenge is:
992
- *
993
- * 1. FINDING the needles (discovery)
994
- * 2. VALIDATING they are real patterns (statistical significance)
995
- * 3. UNDERSTANDING why they matter (educational impact)
996
- * 4. ACTING on them (platform improvements)
997
- *
998
- * This mirrors real-world EdTech analytics: your data contains valuable insights
999
- * about student success, but you need the right tools and skills to find them.
1000
- *
1001
- * Happy Learning!
1002
- *
1003
- * ═══════════════════════════════════════════════════════════════════════════════
1004
- */