npm - make-mp-data - Versions diffs - 3.0.3 → 3.0.5 - Mend

make-mp-data 3.0.3 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

package/README.md +46 -0
package/dungeons/array-of-object-lookup-schema.json +327 -0
package/dungeons/array-of-object-lookup.js +29 -9
package/dungeons/capstone/capstone-ic3.js +291 -0
package/dungeons/capstone/capstone-ic4.js +598 -0
package/dungeons/capstone/capstone-ic5.js +668 -0
package/dungeons/capstone/generate-product-lookup.js +309 -0
package/dungeons/ecommerce-schema.json +462 -0
package/dungeons/{copilot.js → ecommerce.js} +79 -17
package/dungeons/education-schema.json +2409 -0
package/dungeons/education.js +226 -462
package/dungeons/fintech-schema.json +14034 -0
package/dungeons/fintech.js +134 -413
package/dungeons/foobar-schema.json +403 -0
package/dungeons/foobar.js +27 -4
package/dungeons/food-delivery-schema.json +192 -0
package/dungeons/food-delivery.js +602 -0
package/dungeons/food-schema.json +1152 -0
package/dungeons/food.js +173 -406
package/dungeons/gaming-schema.json +1270 -0
package/dungeons/gaming.js +182 -42
package/dungeons/insurance-application-schema.json +204 -0
package/dungeons/insurance-application.js +605 -0
package/dungeons/media-schema.json +906 -0
package/dungeons/media.js +250 -420
package/dungeons/retention-cadence-schema.json +78 -0
package/dungeons/retention-cadence.js +35 -1
package/dungeons/rpg-schema.json +4526 -0
package/dungeons/rpg.js +171 -429
package/dungeons/sanity-schema.json +255 -0
package/dungeons/sanity.js +21 -10
package/dungeons/sass-schema.json +1291 -0
package/dungeons/sass.js +241 -368
package/dungeons/scd-schema.json +919 -0
package/dungeons/scd.js +41 -13
package/dungeons/simple-schema.json +608 -0
package/dungeons/simple.js +52 -15
package/dungeons/simplest-schema.json +1418 -0
package/dungeons/simplest.js +392 -0
package/dungeons/social-schema.json +1118 -0
package/dungeons/social.js +150 -391
package/dungeons/text-generation-schema.json +3096 -0
package/dungeons/text-generation.js +71 -0
package/index.js +8 -6
package/lib/core/config-validator.js +28 -8
package/lib/core/storage.js +5 -5
package/lib/generators/events.js +4 -4
package/lib/orchestrators/mixpanel-sender.js +16 -13
package/lib/orchestrators/user-loop.js +14 -6
package/lib/templates/soup-presets.js +188 -0
package/lib/utils/utils.js +52 -6
package/package.json +1 -1
package/types.d.ts +20 -3
package/dungeons/adspend.js +0 -130
package/dungeons/anon.js +0 -128
package/dungeons/benchmark-heavy.js +0 -240
package/dungeons/benchmark-light.js +0 -140
package/dungeons/big.js +0 -226
package/dungeons/business.js +0 -391
package/dungeons/complex.js +0 -428
package/dungeons/experiments.js +0 -137
package/dungeons/funnels.js +0 -309
package/dungeons/mil.js +0 -323
package/dungeons/mirror.js +0 -161
package/dungeons/soup-test.js +0 -52
package/dungeons/streaming.js +0 -372
package/dungeons/strict-event-test.js +0 -30
package/dungeons/student-teacher.js +0 -438
package/dungeons/too-big-events.js +0 -203
package/dungeons/user-agent.js +0 -209

package/dungeons/education.js CHANGED Viewed

@@ -10,79 +10,167 @@ const chance = u.initChance(SEED);
 const num_users = 5_000;
 const days = 100;
-/** @typedef  {import("../../types.js").Dungeon} Config */
+/** @typedef  {import("../types.d.ts").Dungeon} Config */
 /**
- * NEEDLE IN A HAYSTACK - ELEARNING APP DESIGN
+ * ═══════════════════════════════════════════════════════════════════════════════
+ * DATASET OVERVIEW — LearnPath eLearning Platform
+ * ═══════════════════════════════════════════════════════════════════════════════
  *
- * LearnPath - An online learning platform modeled after Coursera, Khan Academy, and Udemy.
- * The platform supports both self-paced and cohort-based learning, with a robust ecosystem
- * of courses, quizzes, assignments, and social study features.
+ * An online learning platform modeled after Coursera, Khan Academy, and Udemy.
+ * Supports self-paced and cohort-based learning with courses, quizzes,
+ * assignments, and social study features.
+ *
+ * Scale: 5,000 users / 600K events / 100 days / 17 event types
+ *
+ * CORE LOOP:
+ * Register → browse/enroll in courses → watch lectures → practice problems →
+ * quizzes/assignments → certificate earned. Social layer (study groups,
+ * discussions) drives retention. Subscription tiers (free/monthly/annual)
+ * gate completion rates.
+ *
+ * FUNNELS:
+ * - Onboarding: account registered → course enrolled → lecture started
+ * - Learning loop: lecture started → lecture completed → practice problem solved
+ * - Assessment: quiz started → quiz completed → assignment submitted
+ * - Course completion: course enrolled → lecture completed → quiz completed → certificate earned
+ * - Social learning: discussion posted → study group joined → resource downloaded
+ * - Instructor interaction: assignment submitted → assignment graded → instructor feedback given
+ * - Support/monetization: help requested → subscription purchased → course reviewed
+ *
+ * GROUPS: course_id (150 courses), group_id (300 study groups)
+ * SUBSCRIPTIONS: free (~60%), monthly, annual
+ * ACCOUNT TYPES: ~89% students, ~11% instructors (two-sided marketplace)
+ */
+/**
+ * ═══════════════════════════════════════════════════════════════════════════════
+ * ANALYTICS HOOKS (8 architected patterns)
+ * ═══════════════════════════════════════════════════════════════════════════════
  *
- * CORE LEARNING LOOP:
- * Students register accounts, browse and enroll in courses across multiple categories
- * (CS, Math, Science, Business, Arts, Languages), then progress through structured
- * modules consisting of lectures, practice problems, quizzes, and assignments. Successful
- * completion of all requirements earns a certificate. The platform emphasizes active
- * learning through note-taking, practice problems, and peer discussion.
+ * 1. STUDENT VS INSTRUCTOR PROFILES
+ *    Instructor profiles get teaching attributes (courses_created,
+ *    teaching_experience_years, instructor_rating). Students get learning
+ *    attributes (learning_goal, study_hours_per_week).
  *
- * COURSE SYSTEM (events: course enrolled -> lecture started -> lecture completed):
- * - Six course categories spanning technical and creative disciplines
- * - Three difficulty tiers: beginner, intermediate, advanced
- * - Free and paid course options (drives subscription analytics)
- * - 150 unique courses with varying lengths, ratings, and enrollment counts
- * - Modules (1-12 per course) contain lectures, quizzes, and assignments
+ *    Mixpanel reports:
+ *    • Insights → Any event → Unique users → Breakdown: "account_type"
+ *      Expected: ~89% students, ~11% instructors
+ *    • Insights → "instructor feedback given" → Total per user → Breakdown: "account_type"
+ *      Expected: Instructors dominate feedback; students show learning_goal instead
  *
- * LECTURE EXPERIENCE (events: lecture started -> lecture completed):
- * - Variable lecture durations (5-60 minutes) reflecting real MOOC patterns
- * - Playback speed options (0.75x to 2.0x) reveal learning style differences
- * - Note-taking tracking creates a behavioral signal for student diligence
- * - Watch time vs. lecture duration measures actual engagement
+ * 2. DEADLINE CRAMMING
+ *    Assignments submitted on Sun/Mon are rushed: 60% late (vs ~20% baseline),
+ *    quiz scores drop by 25 points. Events carry is_deadline_rush: true.
  *
- * ASSESSMENT SYSTEM (events: quiz started -> quiz completed, assignment submitted -> graded):
- * - Practice quizzes (low stakes, unlimited attempts) vs. graded quizzes vs. final exams
- * - Assignments support text, code, file upload, and project submissions
- * - Grading by instructors, peers, or auto-grader (reflects real platform patterns)
- * - Score tracking enables learning outcome analytics
+ *    Mixpanel reports:
+ *    • Insights → "assignment submitted" → Total → Breakdown: "is_deadline_rush"
+ *      Expected: is_deadline_rush=true shows ~60% late rate vs ~20% baseline
+ *    • Insights → "quiz completed" → Avg "score_percent" → Breakdown: Day of Week
+ *      Expected: Sun/Mon scores ~25 points lower (~40 vs ~65)
  *
- * PRACTICE PROBLEMS (event: practice problem solved):
- * - Difficulty-tiered problems (easy, medium, hard) for skill building
- * - Hint system creates a measurable dependency pattern
- * - Time-to-solve metrics reveal mastery progression
- * - High volume (weight: 12) reflects real platform usage patterns
+ * 3. NOTES-TAKERS SUCCEED
+ *    Students with 5+ notes_taken=true lectures get +20 quiz score boost
+ *    (capped at 100) and 40% chance of bonus certificate. Marked diligent_student: true.
  *
- * SOCIAL LEARNING (events: discussion posted, study group joined):
- * - Discussion forums with questions, answers, and comments
- * - Study groups (study circles, project teams, tutoring groups)
- * - Social features drive retention (a key hook pattern)
+ *    Mixpanel reports:
+ *    • Insights → "quiz completed" → Avg "score_percent" → Breakdown: "diligent_student"
+ *      Expected: diligent_student=true ≈ 85 avg vs ~65 baseline (+20 pts)
+ *    • Insights → "certificate earned" → Total per user → Breakdown: "diligent_student"
+ *      Expected: diligent_student=true earn ~40% more certificates
  *
- * INSTRUCTOR ECOSYSTEM (events: instructor feedback given, assignment graded):
- * - Instructors create courses, grade assignments, and provide feedback
- * - Written, video, and rubric-based feedback types
- * - Response time tracking (1-72 hours) measures instructor engagement
+ * 4. STUDY GROUP RETENTION
+ *    Early study group joiners (within 10 days) retain and get bonus discussions.
+ *    Non-joiners with low quiz scores (<60) churn hard at day 14 (all later events removed).
  *
- * MONETIZATION (event: subscription purchased):
- * - Three tiers: monthly ($19.99), annual ($149.99), lifetime ($499.99)
- * - Free tier with limited access (most users)
- * - Subscription status affects course completion funnels (Hook #7)
+ *    Mixpanel reports:
+ *    • Retention → A: "account registered" → B: Any event → Segment by early study group join
+ *      Expected: Early joiners ~90% D14 retention; non-joiners with low scores ~30%
+ *    • Insights → "discussion posted" → Total per user → Breakdown: "study_group_member"
+ *      Expected: study_group_member=true users post more
  *
- * SUPPORT SYSTEM (event: help requested):
- * - Four topic categories: technical, content, billing, accessibility
- * - Three channels: chat, email, forum
- * - Tracks student friction points
+ * 5. HINT DEPENDENCY
+ *    Hint users get 60% chance of easy problems; non-hint users get 40% chance of
+ *    hard problems with independent_solver: true.
+ *
+ *    Mixpanel reports:
+ *    • Insights → "practice problem solved" → Total → Breakdown: "difficulty" → Filter: hint_used=true
+ *      Expected: ~60% easy (vs ~33% baseline)
+ *    • Insights → "practice problem solved" → Total → Breakdown: "difficulty" → Filter: hint_used=false
+ *      Expected: ~40% hard (vs ~33% baseline)
+ *
+ * 6. SEMESTER-END SPIKE
+ *    Days 75-85: quiz_started, quiz_completed, assignment_submitted events duplicated
+ *    at 80% rate. Events carry semester_end_rush: true.
  *
- * COURSE REVIEWS (event: course reviewed):
- * - 1-5 star ratings with written reviews
- * - Would-recommend boolean for NPS-style analysis
- * - Review length correlates with sentiment strength
+ *    Mixpanel reports:
+ *    • Insights (line) → "quiz started" + "quiz completed" + "assignment submitted" → Daily
+ *      Expected: ~2x volume spike during days 75-85
+ *    • Insights → "quiz completed" → Total → Breakdown: "semester_end_rush"
+ *      Expected: semester_end_rush=true clusters in days 75-85
+ *
+ * 7. FREE VS PAID COURSES
+ *    Free users get 0.5x funnel conversion rate; paid subscribers get 1.5x.
+ *    Free users also lose 55% of certificates. Creates ~2.2x completion gap.
+ *
+ *    Mixpanel reports:
+ *    • Funnels → "course enrolled" → "lecture completed" → "quiz completed" → "certificate earned"
+ *      Breakdown: "subscription_status"
+ *      Expected: free ≈ 15% completion, paid ≈ 33% (~2.2x difference)
+ *    • Insights → "certificate earned" → Total per user → Breakdown: "subscription_status"
+ *      Expected: Paid subscribers earn significantly more certificates
+ *
+ * 8. PLAYBACK SPEED CORRELATION
+ *    Speed learners (>=2.0x, 3+ lectures): compressed watch_time (0.6x),
+ *    paradoxically higher quiz scores (+8 pts). Thorough learners (<=1.0x):
+ *    extended watch_time (1.4x).
+ *
+ *    Mixpanel reports:
+ *    • Insights → "lecture completed" → Avg "watch_time_mins" → Breakdown: "speed_learner"
+ *      Expected: speed_learner=true ≈ 0.6x watch time
+ *    • Insights → "quiz completed" → Avg "score_percent" → Breakdown: "speed_learner_effect"
+ *      Expected: speed_learner_effect=true shows +8 points (faster = better)
+ *
+ * ═══════════════════════════════════════════════════════════════════════════════
+ * ADVANCED ANALYSIS IDEAS
+ * ═══════════════════════════════════════════════════════════════════════════════
+ *
+ * CROSS-HOOK PATTERNS:
+ * - The Ideal Student: notes (H3) + study groups (H4) + no hints (H5) + paid (H7) + speed (H8)
+ * - Cramming Cascade: deadline crammers (H2) compounded with semester-end spike (H6)?
+ * - Social Safety Net: does early study group joining (H4) prevent churn for low scorers?
+ * - Hint-to-Mastery: do hint-dependent (H5) students who join groups (H4) wean off hints?
+ * - Payment + Notes: are paid subscribers (H7) more likely to take notes (H3)?
+ *
+ * COHORT ANALYSIS:
+ * - By education level: PhD vs self-taught hook patterns
+ * - By learning style: visual vs hands-on note-taking rates
+ * - By platform: mobile vs desktop playback speed preferences
+ * - By course category: CS vs Arts hint usage
  *
- * WHY THESE EVENTS/PROPERTIES?
- * - Events model the complete student lifecycle: onboarding -> engagement -> mastery -> certification
- * - Properties enable cohort analysis: learning style, education level, account type, subscription status
- * - Funnels reveal friction: where do students drop off in onboarding, course completion, practice mastery?
- * - Behavioral signals (notes, hints, playback speed, study groups) create discoverable skill gaps
- * - Social features (study groups, discussions) and monetization (subscriptions) drive business metrics
- * - The "needle in haystack" hooks simulate real EdTech insights hidden in production data
+ * FUNNEL ANALYSIS:
+ * - Onboarding by account_type
+ * - Course completion by subscription, notes, study groups
+ * - Practice mastery by hint usage, speed, learning style
+ *
+ * ═══════════════════════════════════════════════════════════════════════════════
+ * EXPECTED METRICS SUMMARY
+ * ═══════════════════════════════════════════════════════════════════════════════
+ *
+ * Hook                    | Metric                | Baseline | Hook Effect  | Ratio
+ * ────────────────────────|───────────────────────|──────────|──────────────|──────
+ * Student vs Instructor   | Profile attributes    | generic  | role-specific| N/A
+ * Deadline Cramming       | Late submission rate  | ~20%     | ~60%         | 3x
+ * Deadline Cramming       | Quiz score (Sun/Mon)  | ~65      | ~40          | -25pt
+ * Notes-Takers Succeed    | Quiz score            | ~65      | ~85          | +20pt
+ * Notes-Takers Succeed    | Certificate rate      | baseline | +40%         | 1.4x
+ * Study Group Retention   | D14 retention         | ~40%     | ~90%         | 2.3x
+ * Study Group Retention   | Post-D14 events       | 100%     | 30% (churn)  | 0.3x
+ * Hint Dependency         | Easy problem rate     | ~33%     | ~60%         | 1.8x
+ * Hint Dependency         | Hard problem rate     | ~33%     | ~40% (no hint)| 1.2x
+ * Semester-End Spike      | Assessment volume     | baseline | ~2x          | 2x
+ * Free vs Paid            | Course completion     | 15%      | 33%          | 2.2x
+ * Playback Speed          | Quiz score (speed)    | ~65      | ~73          | +8pt
  */
 // Generate consistent IDs for lookup tables and event properties
@@ -177,7 +265,7 @@ const config = {
 			isFirstEvent: true,
 			properties: {
 				"account_type": u.pickAWinner(["student", "instructor"], 0.15),
-				"signup_source": u.pickAWinner(["organic", "referral", "school_partnership", "social_ad"]),
+				"signup_source": ["organic", "referral", "school_partnership", "social_ad"],
 			}
 		},
 		{
@@ -185,8 +273,8 @@ const config = {
 			weight: 8,
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
-				"course_category": u.pickAWinner(["CS", "Math", "Science", "Business", "Arts", "Languages"]),
-				"difficulty": u.pickAWinner(["beginner", "intermediate", "advanced"]),
+				"course_category": ["CS", "Math", "Science", "Business", "Arts", "Languages"],
+				"difficulty": ["beginner", "intermediate", "advanced"],
 				"is_free": u.pickAWinner([true, false], 0.4),
 			}
 		},
@@ -217,7 +305,7 @@ const config = {
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
 				"quiz_id": u.pickAWinner(quizIds),
-				"quiz_type": u.pickAWinner(["practice", "graded", "final_exam"]),
+				"quiz_type": ["practice", "graded", "final_exam"],
 				"question_count": u.weighNumRange(5, 50, 0.7, 15),
 			}
 		},
@@ -238,7 +326,7 @@ const config = {
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
 				"assignment_id": u.pickAWinner(assignmentIds),
-				"submission_type": u.pickAWinner(["text", "code", "file", "project"]),
+				"submission_type": ["text", "code", "file", "project"],
 				"word_count": u.weighNumRange(100, 5000, 0.6, 500),
 				"is_late": u.pickAWinner([true, false], 0.2),
 			}
@@ -249,9 +337,9 @@ const config = {
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
 				"assignment_id": u.pickAWinner(assignmentIds),
-				"grade": u.pickAWinner(["A", "B", "C", "D", "F"]),
+				"grade": ["A", "B", "C", "D", "F"],
 				"feedback_length": u.weighNumRange(0, 500, 0.5, 100),
-				"grader": u.pickAWinner(["instructor", "peer", "auto"]),
+				"grader": ["instructor", "peer", "auto"],
 			}
 		},
 		{
@@ -259,7 +347,7 @@ const config = {
 			weight: 7,
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
-				"post_type": u.pickAWinner(["question", "answer", "comment"]),
+				"post_type": ["question", "answer", "comment"],
 				"word_count": u.weighNumRange(10, 500, 0.6, 80),
 			}
 		},
@@ -278,14 +366,14 @@ const config = {
 			properties: {
 				"group_id": u.pickAWinner(groupIds),
 				"group_size": u.weighNumRange(3, 20, 0.7, 8),
-				"group_type": u.pickAWinner(["study_circle", "project_team", "tutoring"]),
+				"group_type": ["study_circle", "project_team", "tutoring"],
 			}
 		},
 		{
 			event: "resource downloaded",
 			weight: 9,
 			properties: {
-				"resource_type": u.pickAWinner(["pdf", "slides", "code_sample", "dataset", "cheat_sheet"]),
+				"resource_type": ["pdf", "slides", "code_sample", "dataset", "cheat_sheet"],
 				"course_id": u.pickAWinner(courseIds),
 			}
 		},
@@ -294,7 +382,7 @@ const config = {
 			weight: 3,
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
-				"feedback_type": u.pickAWinner(["written", "video", "rubric"]),
+				"feedback_type": ["written", "video", "rubric"],
 				"response_time_hours": u.weighNumRange(1, 72, 0.5, 15),
 			}
 		},
@@ -312,7 +400,7 @@ const config = {
 			event: "subscription purchased",
 			weight: 2,
 			properties: {
-				"plan": u.pickAWinner(["monthly", "annual", "lifetime"]),
+				"plan": ["monthly", "annual", "lifetime"],
 				"price": u.pickAWinner([19.99, 149.99, 499.99]),
 			}
 		},
@@ -320,8 +408,8 @@ const config = {
 			event: "help requested",
 			weight: 4,
 			properties: {
-				"topic": u.pickAWinner(["technical", "content", "billing", "accessibility"]),
-				"channel": u.pickAWinner(["chat", "email", "forum"]),
+				"topic": ["technical", "content", "billing", "accessibility"],
+				"channel": ["chat", "email", "forum"],
 			}
 		},
 		{
@@ -330,7 +418,7 @@ const config = {
 			properties: {
 				"course_id": u.pickAWinner(courseIds),
 				"problem_id": u.pickAWinner(problemIds),
-				"difficulty": u.pickAWinner(["easy", "medium", "hard"]),
+				"difficulty": ["easy", "medium", "hard"],
 				"time_to_solve_sec": u.weighNumRange(10, 3600, 0.5, 300),
 				"hint_used": u.pickAWinner([true, false], 0.35),
 			}
@@ -338,17 +426,17 @@ const config = {
 	],
 	superProps: {
-		platform: u.pickAWinner(["Web", "iOS", "Android", "iPad"]),
-		subscription_status: u.pickAWinner(["free", "free", "free", "monthly", "annual"]),
+		platform: ["Web", "iOS", "Android", "iPad"],
 	},
 	scdProps: {},
 	userProps: {
 		"account_type": u.pickAWinner(["student", "student", "student", "student", "student", "student", "student", "student", "instructor"]),
-		"learning_style": u.pickAWinner(["visual", "reading", "hands_on", "auditory"]),
-		"education_level": u.pickAWinner(["high_school", "bachelors", "masters", "phd", "self_taught"]),
-		"timezone": u.pickAWinner(["US_Eastern", "US_Pacific", "US_Central", "Europe", "Asia"]),
+		"subscription_status": u.pickAWinner(["free", "free", "free", "monthly", "annual"]),
+		"learning_style": ["visual", "reading", "hands_on", "auditory"],
+		"education_level": ["high_school", "bachelors", "masters", "phd", "self_taught"],
+		"timezone": ["US_Eastern", "US_Pacific", "US_Central", "Europe", "Asia"],
 	},
 	groupKeys: [
@@ -366,7 +454,7 @@ const config = {
 		group_id: {
 			"name": () => `${chance.pickone(["Study", "Learning", "Focus", "Peer", "Cohort"])} ${chance.pickone(["Circle", "Squad", "Team", "Hub", "Group"])} ${chance.character({ alpha: true, casing: "upper" })}${chance.integer({ min: 1, max: 99 })}`,
 			"member_count": u.weighNumRange(3, 20, 0.7, 8),
-			"focus_area": u.pickAWinner(["CS", "Math", "Science", "Business", "Arts", "Languages"]),
+			"focus_area": ["CS", "Math", "Science", "Business", "Arts", "Languages"],
 		}
 	},
@@ -420,14 +508,9 @@ const config = {
 				}
 			}
-			if (record.event === "quiz completed" && record.time) {
-				const eventDay = dayjs(record.time).day();
-				if (eventDay === 0 || eventDay === 1) {
-					if (record.score_percent !== undefined) {
-						record.score_percent = Math.max(0, record.score_percent - 15);
-					}
-				}
-			}
+			// Quiz score penalty moved to everything hook (after churn removal)
+			// to avoid selection bias — the penalty was causing more Sun/Mon
+			// quiz-takers to trigger hasLowQuizScore churn, inflating their avg
 		}
 		// ═══════════════════════════════════════════════════════════════════
@@ -481,7 +564,7 @@ const config = {
 			if (record.event === "lecture completed") {
 				const speed = record.playback_speed;
-				if (speed >= 1.5) {
+				if (speed >= 2.0) {
 					record.speed_learner = true;
 					record.thorough_learner = false;
 					// Compress watch time for speed learners
@@ -572,42 +655,14 @@ const config = {
 				}
 			}
-			// Hook #4: STUDY GROUP RETENTION
-			if (!joinedStudyGroupEarly && hasLowQuizScore) {
-				// Non-joiners with low scores: remove 70% of events after day 14 (churn)
-				const churnCutoff = firstEventTime ? firstEventTime.add(14, 'days') : null;
-				for (let i = userEvents.length - 1; i >= 0; i--) {
-					const evt = userEvents[i];
-					if (churnCutoff && dayjs(evt.time).isAfter(churnCutoff)) {
-						if (chance.bool({ likelihood: 70 })) {
-							userEvents.splice(i, 1);
-						}
-					}
-				}
-			} else if (joinedStudyGroupEarly) {
-				// Study group joiners keep all events and get bonus discussion_posted events
-				const lastEvent = userEvents[userEvents.length - 1];
-				if (lastEvent && chance.bool({ likelihood: 60 })) {
-					const bonusDiscussion = {
-						event: "discussion posted",
-						time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 3 }), 'days').toISOString(),
-						user_id: lastEvent.user_id,
-						course_id: chance.pickone(courseIds),
-						post_type: chance.pickone(["question", "answer", "comment"]),
-						word_count: chance.integer({ min: 20, max: 400 }),
-						study_group_member: true,
-					};
-					userEvents.push(bonusDiscussion);
-				}
-			}
-			// Hook #8 (everything pass): Speed learners get slightly HIGHER quiz scores
-			let isSpeedLearner = false;
+			// Hook #8 (everything pass): Speed learners (3+ lectures at 2.0x) get higher quiz scores
+			let speedLectureCount = 0;
 			userEvents.forEach((event) => {
 				if (event.event === "lecture completed" && event.speed_learner === true) {
-					isSpeedLearner = true;
+					speedLectureCount++;
 				}
 			});
+			const isSpeedLearner = speedLectureCount >= 3;
 			if (isSpeedLearner) {
 				userEvents.forEach((event) => {
@@ -623,7 +678,7 @@ const config = {
 			// Hook #6: SEMESTER-END SPIKE - duplicate assessment events in the spike window
 			const duplicates = [];
 			userEvents.forEach((event) => {
-				if (event.semester_end_rush === true && chance.bool({ likelihood: 50 })) {
+				if (event.semester_end_rush === true && chance.bool({ likelihood: 80 })) {
 					const dup = JSON.parse(JSON.stringify(event));
 					dup.time = dayjs(event.time).add(chance.integer({ min: 5, max: 120 }), 'minutes').toISOString();
 					dup.semester_end_rush = true;
@@ -635,15 +690,57 @@ const config = {
 			}
 			// Hook #7: FREE VS PAID - reinforce the subscription effect on certificates
-			const subStatus = userEvents.length > 0 ? userEvents[0].subscription_status : "free";
+			const subStatus = meta && meta.profile ? meta.profile.subscription_status : "free";
 			if (subStatus === "free") {
-				// Free users lose 40% of their certificates (simulating lower completion)
+				// Free users lose 55% of their certificates (simulating lower completion)
 				for (let i = userEvents.length - 1; i >= 0; i--) {
-					if (userEvents[i].event === "certificate earned" && chance.bool({ likelihood: 40 })) {
+					if (userEvents[i].event === "certificate earned" && chance.bool({ likelihood: 55 })) {
 						userEvents.splice(i, 1);
 					}
 				}
 			}
+			// Hook #4: STUDY GROUP RETENTION (runs LAST to ensure churn removal isn't undone by later hooks)
+			if (!joinedStudyGroupEarly && hasLowQuizScore) {
+				// Non-joiners with low scores: remove ALL events after day 14 from their first event (hard churn)
+				const churnCutoff = firstEventTime ? firstEventTime.add(14, 'days') : null;
+				for (let i = userEvents.length - 1; i >= 0; i--) {
+					const evt = userEvents[i];
+					if (churnCutoff && dayjs(evt.time).isAfter(churnCutoff)) {
+						userEvents.splice(i, 1);
+					}
+				}
+			} else if (joinedStudyGroupEarly) {
+				// Study group joiners keep all events and get bonus discussion_posted events
+				const lastEvent = userEvents[userEvents.length - 1];
+				if (lastEvent && chance.bool({ likelihood: 60 })) {
+					const bonusDiscussion = {
+						event: "discussion posted",
+						time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 3 }), 'days').toISOString(),
+						user_id: lastEvent.user_id,
+						course_id: chance.pickone(courseIds),
+						post_type: chance.pickone(["question", "answer", "comment"]),
+						word_count: chance.integer({ min: 20, max: 400 }),
+						study_group_member: true,
+					};
+					userEvents.push(bonusDiscussion);
+				}
+			}
+			// Hook #2b: DEADLINE CRAMMING (quiz score penalty)
+			// Applied LAST to avoid selection bias — if applied before churn,
+			// the penalty pushes Sun/Mon quiz-takers below the hasLowQuizScore
+			// threshold, selectively churning them and inflating the avg.
+			userEvents.forEach((event) => {
+				if (event.event === "quiz completed" && event.time) {
+					const eventDay = dayjs(event.time).day();
+					if (eventDay === 0 || eventDay === 1) {
+						if (event.score_percent !== undefined) {
+							event.score_percent = Math.max(0, event.score_percent - 25);
+						}
+					}
+				}
+			});
 		}
 		// ═══════════════════════════════════════════════════════════════════
@@ -655,11 +752,11 @@ const config = {
 				const subscriptionStatus = meta.profile.subscription_status;
 				if (subscriptionStatus === "free") {
-					// Free users convert at 0.6x rate
-					record.conversionRate = (record.conversionRate || 0.25) * 0.6;
+					// Free users convert at 0.5x rate
+					record.conversionRate = (record.conversionRate || 0.25) * 0.5;
 				} else if (subscriptionStatus === "monthly" || subscriptionStatus === "annual") {
-					// Paid subscribers convert at 1.3x rate
-					record.conversionRate = (record.conversionRate || 0.25) * 1.3;
+					// Paid subscribers convert at 1.5x rate
+					record.conversionRate = (record.conversionRate || 0.25) * 1.5;
 				}
 			}
 		}
@@ -669,336 +766,3 @@ const config = {
 };
 export default config;
-/**
- * ═══════════════════════════════════════════════════════════════════════════════
- * NEEDLE IN A HAYSTACK - LEARNPATH ELEARNING ANALYTICS
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * An online learning platform dungeon with 8 deliberately architected analytics
- * insights hidden in the data. This dungeon simulates a real EdTech product
- * (like Coursera, Khan Academy, or Udemy) and demonstrates how meaningful
- * student behavior patterns can be discovered through product analytics.
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * DATASET OVERVIEW
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * - 5,000 users over 100 days
- * - 360K events across 17 event types
- * - 3 funnels (student onboarding, course completion, practice mastery)
- * - Group analytics (courses, study groups)
- * - Lookup tables (courses, quizzes)
- * - Subscription tiers (free, monthly, annual)
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * THE 8 ARCHITECTED HOOKS
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * Each hook creates a specific, discoverable analytics insight that simulates
- * real-world EdTech behavior patterns.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 1. STUDENT VS INSTRUCTOR PROFILES
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: User profiles are enriched based on account_type. Instructors receive
- * teaching attributes (courses_created, teaching_experience_years, instructor_rating).
- * Students receive learning attributes (learning_goal, study_hours_per_week).
- *
- * HOW TO FIND IT:
- *   - Segment users by: account_type = "instructor" vs "student"
- *   - Compare: presence of courses_created vs learning_goal properties
- *   - Filter profiles: instructor_rating exists (instructor-only property)
- *
- * EXPECTED INSIGHT: ~11% of users are instructors with teaching-specific metrics.
- * Instructors should show different event patterns (more feedback given, fewer
- * quizzes completed). Students show learning-goal-driven behavior differences.
- *
- * REAL-WORLD ANALOGUE: Two-sided marketplace profiling. Drivers vs riders in
- * Uber, sellers vs buyers in eBay - each persona has unique attributes and
- * behavioral patterns that require separate analysis.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 2. DEADLINE CRAMMING
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: Assignments submitted on Sundays and Mondays show deadline-rush
- * behavior: 60% are late (vs ~20% baseline) and quiz scores drop by 15 points.
- * These events carry is_deadline_rush: true.
- *
- * HOW TO FIND IT:
- *   - Chart: assignment_submitted by day of week
- *   - Compare: is_late rate by day of week
- *   - Compare: quiz_completed score_percent by day of week
- *   - Filter: is_deadline_rush = true
- *
- * EXPECTED INSIGHT: Clear quality drop on Sun/Mon. Late submission rate spikes
- * from ~20% to ~60%. Quiz scores taken on crunch days average 15 points lower.
- * This creates a visible "weekend dip" in student performance metrics.
- *
- * REAL-WORLD ANALOGUE: The "Sunday Scaries" of EdTech - students procrastinate
- * and cram before Monday deadlines. Identical to real patterns seen in Coursera
- * and university LMS data where submission quality drops near deadlines.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 3. NOTES-TAKERS SUCCEED
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: Students who take notes during 5 or more lecture_completed events
- * receive a +20 boost to all quiz scores (capped at 100), and have a 40% chance
- * of earning an extra certificate. Events are marked diligent_student: true.
- *
- * HOW TO FIND IT:
- *   - Create segment: users with 5+ lecture_completed where notes_taken = true
- *   - Compare: average quiz_completed score_percent
- *   - Compare: certificate_earned count per user
- *   - Filter: diligent_student = true
- *
- * EXPECTED INSIGHT: Diligent note-takers score ~20 points higher on quizzes
- * and earn certificates at a significantly higher rate. This is a classic
- * "active learning" signal visible in the data.
- *
- * REAL-WORLD ANALOGUE: Active engagement features (highlighting, bookmarking,
- * note-taking) that correlate with better learning outcomes. Real research
- * confirms note-taking improves retention by 30-40% - this hook models that.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 4. STUDY GROUP RETENTION
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: Students who join a study group within their first 10 days and have
- * passing quiz scores retain normally and receive bonus discussion events. Students
- * who do NOT join early AND have quiz scores below 60 experience severe churn:
- * 70% of their events after day 14 are removed.
- *
- * HOW TO FIND IT:
- *   - Create cohort: users who did "study group joined" within first 10 days
- *   - Compare: D14/D30 retention rate vs non-joiners
- *   - Compare: total events per user after day 14
- *   - Filter: study_group_member = true on bonus events
- *
- * EXPECTED INSIGHT: Early study group joiners show dramatically better retention
- * curves. Non-joiners with low quiz scores show a cliff-like drop in activity
- * after day 14. The combination of social isolation + poor performance predicts
- * churn with high accuracy.
- *
- * REAL-WORLD ANALOGUE: Social learning features that create accountability and
- * community. MOOCs with study groups or cohort-based programs consistently show
- * 3-5x higher completion rates than pure self-paced learning.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 5. HINT DEPENDENCY
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: In practice_problem_solved events, students who use hints have a 60%
- * chance of having their problem difficulty set to "easy". Students who solve
- * without hints have a 40% chance of tackling "hard" problems and receive
- * independent_solver: true.
- *
- * HOW TO FIND IT:
- *   - Segment practice_problem_solved by: hint_used = true vs false
- *   - Compare: difficulty distribution (easy vs medium vs hard)
- *   - Filter: independent_solver = true
- *   - Compare: average time_to_solve_sec by hint usage
- *
- * EXPECTED INSIGHT: Hint users cluster on easy problems; non-hint users tackle
- * harder problems. This creates a visible "hint dependency" where the scaffolding
- * intended to help students actually limits their growth trajectory.
- *
- * REAL-WORLD ANALOGUE: The "training wheels" problem in education technology.
- * Hints, auto-complete, and guided solutions can create dependency rather than
- * building genuine competence. Real platforms like LeetCode and HackerRank
- * observe this pattern.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 6. SEMESTER-END SPIKE
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: During days 75-85 of the dataset, quiz_started, quiz_completed, and
- * assignment_submitted events have a 50% chance of being duplicated (with slightly
- * offset timestamps). All events in this window carry semester_end_rush: true.
- *
- * HOW TO FIND IT:
- *   - Chart: quiz_started, quiz_completed, assignment_submitted counts by day
- *   - Look for: clear volume spike during days 75-85
- *   - Filter: semester_end_rush = true
- *   - Compare: event volume in days 75-85 vs days 60-75 (baseline)
- *
- * EXPECTED INSIGHT: Assessment activity roughly doubles during the "finals"
- * period. This creates a visible spike in the time series that mirrors real
- * academic calendar patterns.
- *
- * REAL-WORLD ANALOGUE: End-of-semester, end-of-quarter, or end-of-trial
- * behavior spikes. Every EdTech platform sees massive activity surges before
- * deadlines, certification exams, or subscription renewal dates.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 7. FREE VS PAID COURSES
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: The Course Completion funnel conversion rate is modified by the user's
- * subscription_status. Free users convert at 0.6x the base rate; monthly and
- * annual subscribers convert at 1.3x. This creates a ~2.2x difference between
- * free and paid users in course completion.
- *
- * HOW TO FIND IT:
- *   - Segment the Course Completion funnel by: subscription_status
- *   - Compare: funnel conversion rates for free vs monthly vs annual
- *   - Compare: certificate_earned counts by subscription_status
- *
- * EXPECTED INSIGHT: Paid subscribers are roughly 2x more likely to complete
- * courses end-to-end. Free users drop off heavily between quiz_completed and
- * certificate_earned. This mirrors the "skin in the game" effect.
- *
- * REAL-WORLD ANALOGUE: The well-documented correlation between payment and
- * completion in online education. Paid Coursera learners complete courses at
- * 5-10x the rate of free audit-track learners. Financial commitment creates
- * psychological commitment.
- *
- * ───────────────────────────────────────────────────────────────────────────────
- * 8. PLAYBACK SPEED CORRELATION
- * ───────────────────────────────────────────────────────────────────────────────
- *
- * PATTERN: In lecture_completed events, playback speed creates two distinct
- * learner segments:
- *   - Speed learners (>= 1.5x): get speed_learner: true, compressed watch_time
- *     (0.6x), and paradoxically HIGHER quiz scores (+8 points)
- *   - Thorough learners (<= 1.0x): get thorough_learner: true, extended watch_time
- *     (1.4x)
- *
- * HOW TO FIND IT:
- *   - Segment lecture_completed by: playback_speed
- *   - Compare: average watch_time_mins by speed bucket
- *   - Compare: subsequent quiz_completed score_percent
- *   - Filter: speed_learner = true or thorough_learner = true
- *   - Correlate: playback_speed with quiz performance
- *
- * EXPECTED INSIGHT: Counter-intuitively, speed learners score slightly higher
- * on quizzes despite watching lectures faster. This suggests that playback speed
- * is a proxy for prior knowledge or aptitude, not laziness.
- *
- * REAL-WORLD ANALOGUE: Research on lecture playback speed consistently shows
- * that students who watch at 1.5-2x speed perform equally or better on assessments.
- * Speed selection correlates with confidence and familiarity with the material,
- * not with learning quality.
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * ADVANCED ANALYSIS IDEAS
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * CROSS-HOOK PATTERNS:
- *
- * 1. The Ideal Student: Users who:
- *    - Take notes consistently (Hook #3)
- *    - Join study groups early (Hook #4)
- *    - Solve problems without hints (Hook #5)
- *    - Have paid subscriptions (Hook #7)
- *    - Watch lectures at higher speed (Hook #8)
- *    These students should have exceptional completion rates and quiz scores.
- *
- * 2. The Cramming Cascade: Do deadline crammers (Hook #2) also show up
- *    in the semester-end spike (Hook #6)? Is the quality drop compounded?
- *
- * 3. Social Safety Net: Does early study group joining (Hook #4) prevent
- *    churn even for students who struggle on quizzes?
- *
- * 4. Hint-to-Mastery Pipeline: Do hint-dependent students (Hook #5) who
- *    later join study groups (Hook #4) eventually wean off hints?
- *
- * 5. Payment + Notes: Are paid subscribers (Hook #7) more likely to take
- *    notes (Hook #3)? Does the combination create a super-performer segment?
- *
- * COHORT ANALYSIS:
- *
- * - Cohort by education level: Do PhD students vs self-taught learners
- *   show different hook patterns?
- * - Cohort by learning style: Do visual vs hands-on learners take more notes?
- * - Cohort by platform: Do mobile (iOS/Android) users have different playback
- *   speed preferences than Web/iPad users?
- * - Cohort by course category: Do CS students use hints more than Arts students?
- *
- * FUNNEL ANALYSIS:
- *
- * - Onboarding Funnel: How does account_type affect the register -> enroll ->
- *   first lecture conversion?
- * - Course Completion Funnel: Compare by subscription_status, note-taking
- *   behavior, and study group membership
- * - Practice Mastery Funnel: Compare by hint usage, playback speed, and
- *   learning style
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * EXPECTED METRICS SUMMARY
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * Hook                    | Metric                | Baseline | Hook Effect  | Ratio
- * ────────────────────────|───────────────────────|──────────|──────────────|──────
- * Student vs Instructor   | Profile attributes    | generic  | role-specific| N/A
- * Deadline Cramming       | Late submission rate  | ~20%     | ~60%         | 3x
- * Deadline Cramming       | Quiz score (Sun/Mon)  | ~65      | ~50          | -15pt
- * Notes-Takers Succeed    | Quiz score            | ~65      | ~85          | +20pt
- * Notes-Takers Succeed    | Certificate rate      | baseline | +40%         | 1.4x
- * Study Group Retention   | D14 retention         | ~40%     | ~90%         | 2.3x
- * Study Group Retention   | Post-D14 events       | 100%     | 30% (churn)  | 0.3x
- * Hint Dependency         | Easy problem rate     | ~33%     | ~60%         | 1.8x
- * Hint Dependency         | Hard problem rate     | ~33%     | ~40% (no hint)| 1.2x
- * Semester-End Spike      | Assessment volume     | baseline | ~2x          | 2x
- * Free vs Paid            | Course completion     | 15%      | 33%          | 2.2x
- * Playback Speed          | Quiz score (speed)    | ~65      | ~73          | +8pt
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * HOW TO RUN THIS DUNGEON
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * From the dm4 root directory:
- *
- *   npm start
- *
- * Or programmatically:
- *
- *   import generate from './index.js';
- *   import config from './dungeons/harness-education.js';
- *   const results = await generate(config);
- *
- * OUTPUT FILES (with writeToDisk: false, format: "json", gzip: true):
- *
- *   - needle-haystack-education__events.json.gz - All event data
- *   - needle-haystack-education__user_profiles.json.gz - User profiles
- *   - needle-haystack-education__group_profiles.json.gz - Course & study group profiles
- *   - needle-haystack-education__course_id_lookup.json.gz - Course catalog
- *   - needle-haystack-education__quiz_id_lookup.json.gz - Quiz catalog
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * TESTING YOUR ANALYTICS PLATFORM
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * This dungeon is perfect for testing:
- *
- * 1. Segmentation: Can you separate instructor vs student behavior patterns?
- * 2. Temporal Analysis: Can you detect the deadline cramming and semester-end spike?
- * 3. Behavioral Correlation: Can you discover the note-taking success pattern?
- * 4. Retention Analysis: Can you identify the study group retention effect?
- * 5. Feature Impact: Can you measure hint dependency on problem difficulty?
- * 6. Anomaly Detection: Can you automatically detect the semester-end volume spike?
- * 7. Funnel Analysis: Can you quantify the free vs paid completion gap?
- * 8. Counter-intuitive Insight: Can you find the speed learner paradox?
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- * WHY "NEEDLE IN A HAYSTACK"?
- * ═══════════════════════════════════════════════════════════════════════════════
- *
- * Each hook is a "needle" - a meaningful, actionable insight hidden in a
- * "haystack" of 360K events. The challenge is:
- *
- * 1. FINDING the needles (discovery)
- * 2. VALIDATING they are real patterns (statistical significance)
- * 3. UNDERSTANDING why they matter (educational impact)
- * 4. ACTING on them (platform improvements)
- *
- * This mirrors real-world EdTech analytics: your data contains valuable insights
- * about student success, but you need the right tools and skills to find them.
- *
- * Happy Learning!
- *
- * ═══════════════════════════════════════════════════════════════════════════════
- */