make-mp-data 2.1.11 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +31 -0
  2. package/dungeons/adspend.js +2 -2
  3. package/dungeons/ai-chat-analytics-ed.js +3 -2
  4. package/dungeons/anon.js +2 -2
  5. package/dungeons/array-of-object-loopup.js +181 -0
  6. package/dungeons/benchmark-heavy.js +241 -0
  7. package/dungeons/benchmark-light.js +141 -0
  8. package/dungeons/big.js +9 -8
  9. package/dungeons/business.js +2 -1
  10. package/dungeons/clinch-agi.js +632 -0
  11. package/dungeons/complex.js +3 -2
  12. package/dungeons/copilot.js +383 -0
  13. package/dungeons/ecommerce-store.js +0 -0
  14. package/dungeons/experiments.js +5 -4
  15. package/dungeons/foobar.js +1 -1
  16. package/dungeons/funnels.js +2 -2
  17. package/dungeons/gaming.js +3 -2
  18. package/dungeons/harness/harness-education.js +988 -0
  19. package/dungeons/harness/harness-fintech.js +976 -0
  20. package/dungeons/harness/harness-food.js +985 -0
  21. package/dungeons/harness/harness-gaming.js +1178 -0
  22. package/dungeons/harness/harness-media.js +961 -0
  23. package/dungeons/harness/harness-sass.js +923 -0
  24. package/dungeons/harness/harness-social.js +928 -0
  25. package/dungeons/kurby.js +211 -0
  26. package/dungeons/media.js +5 -4
  27. package/dungeons/mil.js +4 -3
  28. package/dungeons/mirror.js +2 -2
  29. package/dungeons/money2020-ed.js +8 -7
  30. package/dungeons/sanity.js +3 -2
  31. package/dungeons/scd.js +3 -2
  32. package/dungeons/simple.js +30 -15
  33. package/dungeons/strict-event-test.js +30 -0
  34. package/dungeons/student-teacher.js +3 -2
  35. package/dungeons/text-generation.js +84 -85
  36. package/dungeons/too-big-events.js +166 -0
  37. package/dungeons/uday-schema.json +220 -0
  38. package/dungeons/userAgent.js +4 -3
  39. package/index.js +41 -54
  40. package/lib/core/config-validator.js +122 -7
  41. package/lib/core/context.js +7 -14
  42. package/lib/core/storage.js +57 -25
  43. package/lib/generators/adspend.js +12 -12
  44. package/lib/generators/events.js +6 -5
  45. package/lib/generators/funnels.js +32 -10
  46. package/lib/generators/product-lookup.js +262 -0
  47. package/lib/generators/product-names.js +195 -0
  48. package/lib/generators/profiles.js +3 -3
  49. package/lib/generators/scd.js +13 -3
  50. package/lib/generators/text.js +17 -4
  51. package/lib/orchestrators/mixpanel-sender.js +244 -204
  52. package/lib/orchestrators/user-loop.js +54 -16
  53. package/lib/templates/funnels-instructions.txt +272 -0
  54. package/lib/templates/hook-examples.json +187 -0
  55. package/lib/templates/hooks-instructions.txt +295 -8
  56. package/lib/templates/phrases.js +473 -16
  57. package/lib/templates/refine-instructions.txt +485 -0
  58. package/lib/templates/schema-instructions.txt +239 -109
  59. package/lib/templates/schema.d.ts +173 -0
  60. package/lib/templates/verbose-schema.js +140 -206
  61. package/lib/utils/ai.js +853 -77
  62. package/lib/utils/chart.js +210 -0
  63. package/lib/utils/function-registry.js +285 -0
  64. package/lib/utils/json-evaluator.js +172 -0
  65. package/lib/utils/logger.js +38 -0
  66. package/lib/utils/mixpanel.js +101 -0
  67. package/lib/utils/project.js +3 -2
  68. package/lib/utils/utils.js +41 -4
  69. package/package.json +15 -21
  70. package/types.d.ts +15 -5
  71. package/lib/generators/text-bak-old.js +0 -1121
  72. package/lib/orchestrators/worker-manager.js +0 -203
  73. package/lib/templates/phrases-bak.js +0 -925
  74. package/lib/templates/prompt (old).txt +0 -98
  75. package/lib/templates/scratch-dungeon-template.js +0 -116
  76. package/lib/templates/textQuickTest.js +0 -172
@@ -0,0 +1,988 @@
1
+ import dayjs from "dayjs";
2
+ import utc from "dayjs/plugin/utc.js";
3
+ import "dotenv/config";
4
+ import * as u from "../../lib/utils/utils.js";
5
+ import * as v from "ak-tools";
6
+
7
+ const SEED = "harness-education";
8
+ dayjs.extend(utc);
9
+ const chance = u.initChance(SEED);
10
+ const num_users = 5_000;
11
+ const days = 100;
12
+
13
+ /** @typedef {import("../../types.js").Dungeon} Config */
14
+
15
+ /**
16
+ * NEEDLE IN A HAYSTACK - ELEARNING APP DESIGN
17
+ *
18
+ * LearnPath - An online learning platform modeled after Coursera, Khan Academy, and Udemy.
19
+ * The platform supports both self-paced and cohort-based learning, with a robust ecosystem
20
+ * of courses, quizzes, assignments, and social study features.
21
+ *
22
+ * CORE LEARNING LOOP:
23
+ * Students register accounts, browse and enroll in courses across multiple categories
24
+ * (CS, Math, Science, Business, Arts, Languages), then progress through structured
25
+ * modules consisting of lectures, practice problems, quizzes, and assignments. Successful
26
+ * completion of all requirements earns a certificate. The platform emphasizes active
27
+ * learning through note-taking, practice problems, and peer discussion.
28
+ *
29
+ * COURSE SYSTEM (events: course enrolled -> lecture started -> lecture completed):
30
+ * - Six course categories spanning technical and creative disciplines
31
+ * - Three difficulty tiers: beginner, intermediate, advanced
32
+ * - Free and paid course options (drives subscription analytics)
33
+ * - 150 unique courses with varying lengths, ratings, and enrollment counts
34
+ * - Modules (1-12 per course) contain lectures, quizzes, and assignments
35
+ *
36
+ * LECTURE EXPERIENCE (events: lecture started -> lecture completed):
37
+ * - Variable lecture durations (5-60 minutes) reflecting real MOOC patterns
38
+ * - Playback speed options (0.75x to 2.0x) reveal learning style differences
39
+ * - Note-taking tracking creates a behavioral signal for student diligence
40
+ * - Watch time vs. lecture duration measures actual engagement
41
+ *
42
+ * ASSESSMENT SYSTEM (events: quiz started -> quiz completed, assignment submitted -> graded):
43
+ * - Practice quizzes (low stakes, unlimited attempts) vs. graded quizzes vs. final exams
44
+ * - Assignments support text, code, file upload, and project submissions
45
+ * - Grading by instructors, peers, or auto-grader (reflects real platform patterns)
46
+ * - Score tracking enables learning outcome analytics
47
+ *
48
+ * PRACTICE PROBLEMS (event: practice problem solved):
49
+ * - Difficulty-tiered problems (easy, medium, hard) for skill building
50
+ * - Hint system creates a measurable dependency pattern
51
+ * - Time-to-solve metrics reveal mastery progression
52
+ * - High volume (weight: 12) reflects real platform usage patterns
53
+ *
54
+ * SOCIAL LEARNING (events: discussion posted, study group joined):
55
+ * - Discussion forums with questions, answers, and comments
56
+ * - Study groups (study circles, project teams, tutoring groups)
57
+ * - Social features drive retention (a key hook pattern)
58
+ *
59
+ * INSTRUCTOR ECOSYSTEM (events: instructor feedback given, assignment graded):
60
+ * - Instructors create courses, grade assignments, and provide feedback
61
+ * - Written, video, and rubric-based feedback types
62
+ * - Response time tracking (1-72 hours) measures instructor engagement
63
+ *
64
+ * MONETIZATION (event: subscription purchased):
65
+ * - Three tiers: monthly ($19.99), annual ($149.99), lifetime ($499.99)
66
+ * - Free tier with limited access (most users)
67
+ * - Subscription status affects course completion funnels (Hook #7)
68
+ *
69
+ * SUPPORT SYSTEM (event: help requested):
70
+ * - Four topic categories: technical, content, billing, accessibility
71
+ * - Three channels: chat, email, forum
72
+ * - Tracks student friction points
73
+ *
74
+ * COURSE REVIEWS (event: course reviewed):
75
+ * - 1-5 star ratings with written reviews
76
+ * - Would-recommend boolean for NPS-style analysis
77
+ * - Review length correlates with sentiment strength
78
+ *
79
+ * WHY THESE EVENTS/PROPERTIES?
80
+ * - Events model the complete student lifecycle: onboarding -> engagement -> mastery -> certification
81
+ * - Properties enable cohort analysis: learning style, education level, account type, subscription status
82
+ * - Funnels reveal friction: where do students drop off in onboarding, course completion, practice mastery?
83
+ * - Behavioral signals (notes, hints, playback speed, study groups) create discoverable skill gaps
84
+ * - Social features (study groups, discussions) and monetization (subscriptions) drive business metrics
85
+ * - The "needle in haystack" hooks simulate real EdTech insights hidden in production data
86
+ */
87
+
88
+ // Generate consistent IDs for lookup tables and event properties
89
+ const courseIds = v.range(1, 151).map(n => `course_${v.uid(6)}`);
90
+ const quizIds = v.range(1, 401).map(n => `quiz_${v.uid(6)}`);
91
+ const groupIds = v.range(1, 301).map(n => `group_${v.uid(6)}`);
92
+ const lectureIds = v.range(1, 501).map(n => `lecture_${v.uid(6)}`);
93
+ const assignmentIds = v.range(1, 201).map(n => `assignment_${v.uid(6)}`);
94
+ const problemIds = v.range(1, 601).map(n => `problem_${v.uid(6)}`);
95
+
96
+ /** @type {Config} */
97
+ const config = {
98
+ token: "10a2fd5d566edd19e803036b276fe91b",
99
+ seed: SEED,
100
+ numDays: days,
101
+ numEvents: num_users * 120,
102
+ numUsers: num_users,
103
+ hasAnonIds: false,
104
+ hasSessionIds: true,
105
+ format: "json",
106
+ gzip: true,
107
+ alsoInferFunnels: false,
108
+ hasLocation: true,
109
+ hasAndroidDevices: true,
110
+ hasIOSDevices: true,
111
+ hasDesktopDevices: true,
112
+ hasBrowser: false,
113
+ hasCampaigns: false,
114
+ isAnonymous: false,
115
+ hasAdSpend: false,
116
+ percentUsersBornInDataset: 50,
117
+ hasAvatar: true,
118
+ makeChart: false,
119
+ batchSize: 2_500_000,
120
+ concurrency: 10,
121
+ writeToDisk: false,
122
+
123
+ funnels: [
124
+ {
125
+ sequence: ["account registered", "course enrolled", "lecture started"],
126
+ isFirstFunnel: true,
127
+ conversionRate: 75,
128
+ timeToConvert: 1,
129
+ },
130
+ {
131
+ // Core learning loop: students watch lectures and do practice problems constantly
132
+ sequence: ["lecture started", "lecture completed", "practice problem solved"],
133
+ conversionRate: 70,
134
+ timeToConvert: 4,
135
+ weight: 5,
136
+ },
137
+ {
138
+ // Assessment flow: quizzes and assignments after studying
139
+ sequence: ["quiz started", "quiz completed", "assignment submitted"],
140
+ conversionRate: 55,
141
+ timeToConvert: 8,
142
+ weight: 3,
143
+ },
144
+ {
145
+ // Course completion journey: enroll → complete → earn certificate
146
+ sequence: ["course enrolled", "lecture completed", "quiz completed", "certificate earned"],
147
+ conversionRate: 30,
148
+ timeToConvert: 48,
149
+ weight: 2,
150
+ },
151
+ {
152
+ // Social learning: discussions and study groups
153
+ sequence: ["discussion posted", "study group joined", "resource downloaded"],
154
+ conversionRate: 50,
155
+ timeToConvert: 12,
156
+ weight: 2,
157
+ },
158
+ {
159
+ // Instructor interaction loop
160
+ sequence: ["assignment submitted", "assignment graded", "instructor feedback given"],
161
+ conversionRate: 45,
162
+ timeToConvert: 24,
163
+ weight: 2,
164
+ },
165
+ {
166
+ // Support and monetization
167
+ sequence: ["help requested", "subscription purchased", "course reviewed"],
168
+ conversionRate: 35,
169
+ timeToConvert: 24,
170
+ weight: 1,
171
+ },
172
+ ],
173
+
174
+ events: [
175
+ {
176
+ event: "account registered",
177
+ weight: 1,
178
+ isFirstEvent: true,
179
+ properties: {
180
+ "account_type": u.pickAWinner(["student", "instructor"], 0.15),
181
+ "signup_source": u.pickAWinner(["organic", "referral", "school_partnership", "social_ad"]),
182
+ }
183
+ },
184
+ {
185
+ event: "course enrolled",
186
+ weight: 8,
187
+ properties: {
188
+ "course_id": u.pickAWinner(courseIds),
189
+ "course_category": u.pickAWinner(["CS", "Math", "Science", "Business", "Arts", "Languages"]),
190
+ "difficulty": u.pickAWinner(["beginner", "intermediate", "advanced"]),
191
+ "is_free": u.pickAWinner([true, false], 0.4),
192
+ }
193
+ },
194
+ {
195
+ event: "lecture started",
196
+ weight: 18,
197
+ properties: {
198
+ "course_id": u.pickAWinner(courseIds),
199
+ "lecture_id": u.pickAWinner(lectureIds),
200
+ "lecture_duration_mins": u.weighNumRange(5, 60, 0.8, 20),
201
+ "module_number": u.weighNumRange(1, 12),
202
+ }
203
+ },
204
+ {
205
+ event: "lecture completed",
206
+ weight: 14,
207
+ properties: {
208
+ "course_id": u.pickAWinner(courseIds),
209
+ "lecture_id": u.pickAWinner(lectureIds),
210
+ "watch_time_mins": u.weighNumRange(3, 60, 0.8, 20),
211
+ "playback_speed": u.pickAWinner([0.75, 1.0, 1.0, 1.0, 1.25, 1.5, 2.0]),
212
+ "notes_taken": u.pickAWinner([true, false], 0.35),
213
+ }
214
+ },
215
+ {
216
+ event: "quiz started",
217
+ weight: 10,
218
+ properties: {
219
+ "course_id": u.pickAWinner(courseIds),
220
+ "quiz_id": u.pickAWinner(quizIds),
221
+ "quiz_type": u.pickAWinner(["practice", "graded", "final_exam"]),
222
+ "question_count": u.weighNumRange(5, 50, 0.7, 15),
223
+ }
224
+ },
225
+ {
226
+ event: "quiz completed",
227
+ weight: 8,
228
+ properties: {
229
+ "course_id": u.pickAWinner(courseIds),
230
+ "quiz_id": u.pickAWinner(quizIds),
231
+ "score_percent": u.weighNumRange(0, 100, 1.2, 50),
232
+ "time_spent_mins": u.weighNumRange(3, 120, 0.6, 25),
233
+ "attempts": u.weighNumRange(1, 5, 0.5, 3),
234
+ }
235
+ },
236
+ {
237
+ event: "assignment submitted",
238
+ weight: 6,
239
+ properties: {
240
+ "course_id": u.pickAWinner(courseIds),
241
+ "assignment_id": u.pickAWinner(assignmentIds),
242
+ "submission_type": u.pickAWinner(["text", "code", "file", "project"]),
243
+ "word_count": u.weighNumRange(100, 5000, 0.6, 500),
244
+ "is_late": u.pickAWinner([true, false], 0.2),
245
+ }
246
+ },
247
+ {
248
+ event: "assignment graded",
249
+ weight: 5,
250
+ properties: {
251
+ "course_id": u.pickAWinner(courseIds),
252
+ "assignment_id": u.pickAWinner(assignmentIds),
253
+ "grade": u.pickAWinner(["A", "B", "C", "D", "F"]),
254
+ "feedback_length": u.weighNumRange(0, 500, 0.5, 100),
255
+ "grader": u.pickAWinner(["instructor", "peer", "auto"]),
256
+ }
257
+ },
258
+ {
259
+ event: "discussion posted",
260
+ weight: 7,
261
+ properties: {
262
+ "course_id": u.pickAWinner(courseIds),
263
+ "post_type": u.pickAWinner(["question", "answer", "comment"]),
264
+ "word_count": u.weighNumRange(10, 500, 0.6, 80),
265
+ }
266
+ },
267
+ {
268
+ event: "certificate earned",
269
+ weight: 2,
270
+ properties: {
271
+ "course_id": u.pickAWinner(courseIds),
272
+ "completion_time_days": u.weighNumRange(7, 180, 0.5, 45),
273
+ "final_grade": u.weighNumRange(60, 100, 1.2, 30),
274
+ }
275
+ },
276
+ {
277
+ event: "study group joined",
278
+ weight: 4,
279
+ properties: {
280
+ "group_id": u.pickAWinner(groupIds),
281
+ "group_size": u.weighNumRange(3, 20, 0.7, 8),
282
+ "group_type": u.pickAWinner(["study_circle", "project_team", "tutoring"]),
283
+ }
284
+ },
285
+ {
286
+ event: "resource downloaded",
287
+ weight: 9,
288
+ properties: {
289
+ "resource_type": u.pickAWinner(["pdf", "slides", "code_sample", "dataset", "cheat_sheet"]),
290
+ "course_id": u.pickAWinner(courseIds),
291
+ }
292
+ },
293
+ {
294
+ event: "instructor feedback given",
295
+ weight: 3,
296
+ properties: {
297
+ "course_id": u.pickAWinner(courseIds),
298
+ "feedback_type": u.pickAWinner(["written", "video", "rubric"]),
299
+ "response_time_hours": u.weighNumRange(1, 72, 0.5, 15),
300
+ }
301
+ },
302
+ {
303
+ event: "course reviewed",
304
+ weight: 3,
305
+ properties: {
306
+ "course_id": u.pickAWinner(courseIds),
307
+ "rating": u.weighNumRange(1, 5, 1.5, 3),
308
+ "review_length": u.weighNumRange(10, 1000, 0.5, 100),
309
+ "would_recommend": u.pickAWinner([true, false], 0.7),
310
+ }
311
+ },
312
+ {
313
+ event: "subscription purchased",
314
+ weight: 2,
315
+ properties: {
316
+ "plan": u.pickAWinner(["monthly", "annual", "lifetime"]),
317
+ "price": u.pickAWinner([19.99, 149.99, 499.99]),
318
+ }
319
+ },
320
+ {
321
+ event: "help requested",
322
+ weight: 4,
323
+ properties: {
324
+ "topic": u.pickAWinner(["technical", "content", "billing", "accessibility"]),
325
+ "channel": u.pickAWinner(["chat", "email", "forum"]),
326
+ }
327
+ },
328
+ {
329
+ event: "practice problem solved",
330
+ weight: 12,
331
+ properties: {
332
+ "course_id": u.pickAWinner(courseIds),
333
+ "problem_id": u.pickAWinner(problemIds),
334
+ "difficulty": u.pickAWinner(["easy", "medium", "hard"]),
335
+ "time_to_solve_sec": u.weighNumRange(10, 3600, 0.5, 300),
336
+ "hint_used": u.pickAWinner([true, false], 0.35),
337
+ }
338
+ },
339
+ ],
340
+
341
+ superProps: {
342
+ platform: u.pickAWinner(["Web", "iOS", "Android", "iPad"]),
343
+ subscription_status: u.pickAWinner(["free", "free", "free", "monthly", "annual"]),
344
+ },
345
+
346
+ scdProps: {},
347
+
348
+ userProps: {
349
+ "account_type": u.pickAWinner(["student", "student", "student", "student", "student", "student", "student", "student", "instructor"]),
350
+ "learning_style": u.pickAWinner(["visual", "reading", "hands_on", "auditory"]),
351
+ "education_level": u.pickAWinner(["high_school", "bachelors", "masters", "phd", "self_taught"]),
352
+ "timezone": u.pickAWinner(["US_Eastern", "US_Pacific", "US_Central", "Europe", "Asia"]),
353
+ },
354
+
355
+ groupKeys: [
356
+ ["course_id", 150, ["course enrolled", "lecture started", "lecture completed", "quiz completed", "certificate earned"]],
357
+ ["group_id", 300, ["study group joined", "discussion posted"]],
358
+ ],
359
+
360
+ groupProps: {
361
+ course_id: {
362
+ "title": () => `${chance.pickone(["Introduction to", "Advanced", "Mastering", "Fundamentals of", "Applied"])} ${chance.pickone(["Algorithms", "Data Science", "Machine Learning", "Statistics", "Web Development", "Calculus", "Biology", "Economics", "Design Thinking", "Creative Writing"])}`,
363
+ "instructor_count": u.weighNumRange(1, 5, 0.5, 2),
364
+ "total_enrolled": u.weighNumRange(50, 5000, 0.6, 500),
365
+ "avg_rating": u.weighNumRange(3, 5, 1.5, 1),
366
+ },
367
+ group_id: {
368
+ "name": () => `${chance.pickone(["Study", "Learning", "Focus", "Peer", "Cohort"])} ${chance.pickone(["Circle", "Squad", "Team", "Hub", "Group"])} ${chance.character({ alpha: true, casing: "upper" })}${chance.integer({ min: 1, max: 99 })}`,
369
+ "member_count": u.weighNumRange(3, 20, 0.7, 8),
370
+ "focus_area": u.pickAWinner(["CS", "Math", "Science", "Business", "Arts", "Languages"]),
371
+ }
372
+ },
373
+
374
+ lookupTables: [],
375
+
376
+ /**
377
+ * ARCHITECTED ANALYTICS HOOKS
378
+ *
379
+ * This hook function creates 8 deliberate patterns in the data:
380
+ *
381
+ * 1. STUDENT VS INSTRUCTOR PROFILES: Instructor profiles get teaching attributes; students get learning attributes
382
+ * 2. DEADLINE CRAMMING: Assignments submitted on Sun/Mon are rushed and lower quality
383
+ * 3. NOTES-TAKERS SUCCEED: Students who take notes during lectures score higher on quizzes
384
+ * 4. STUDY GROUP RETENTION: Early study group joiners retain; non-joiners with low scores churn
385
+ * 5. HINT DEPENDENCY: Hint users get locked into easy problems; non-hint users tackle harder ones
386
+ * 6. SEMESTER-END SPIKE: Days 75-85 see doubled assessment activity (cramming period)
387
+ * 7. FREE VS PAID COURSES: Paid subscribers convert through Course Completion funnel at ~2.2x rate
388
+ * 8. PLAYBACK SPEED CORRELATION: Speed learners paradoxically score higher; thorough learners get extended time
389
+ */
390
+ hook: function (record, type, meta) {
391
+ const NOW = dayjs();
392
+ const DATASET_START = NOW.subtract(days, 'days');
393
+
394
+ // ═══════════════════════════════════════════════════════════════════
395
+ // Hook #1: STUDENT VS INSTRUCTOR PROFILES
396
+ // ═══════════════════════════════════════════════════════════════════
397
+ if (type === "user") {
398
+ if (record.account_type === "instructor") {
399
+ record.courses_created = chance.integer({ min: 1, max: 15 });
400
+ record.teaching_experience_years = chance.integer({ min: 1, max: 20 });
401
+ record.instructor_rating = Math.round((chance.floating({ min: 3.0, max: 5.0 }) + Number.EPSILON) * 100) / 100;
402
+ } else {
403
+ record.learning_goal = chance.pickone(["career_change", "skill_upgrade", "hobby", "degree_requirement"]);
404
+ record.study_hours_per_week = chance.integer({ min: 2, max: 30 });
405
+ }
406
+ }
407
+
408
+ // ═══════════════════════════════════════════════════════════════════
409
+ // Hook #2: DEADLINE CRAMMING
410
+ // ═══════════════════════════════════════════════════════════════════
411
+ if (type === "event") {
412
+ if (record.event === "assignment submitted" && record.time) {
413
+ const eventDay = dayjs(record.time).day(); // 0 = Sunday, 1 = Monday
414
+ if (eventDay === 0 || eventDay === 1) {
415
+ record.is_deadline_rush = true;
416
+ // 60% chance of being late (only 40% on time)
417
+ record.is_late = !chance.bool({ likelihood: 40 });
418
+ } else {
419
+ record.is_deadline_rush = false;
420
+ record.is_late = !chance.bool({ likelihood: 80 });
421
+ }
422
+ }
423
+
424
+ if (record.event === "quiz completed" && record.time) {
425
+ const eventDay = dayjs(record.time).day();
426
+ if (eventDay === 0 || eventDay === 1) {
427
+ if (record.score_percent !== undefined) {
428
+ record.score_percent = Math.max(0, record.score_percent - 15);
429
+ }
430
+ }
431
+ }
432
+ }
433
+
434
+ // ═══════════════════════════════════════════════════════════════════
435
+ // Hook #5: HINT DEPENDENCY
436
+ // ═══════════════════════════════════════════════════════════════════
437
+ if (type === "event") {
438
+ if (record.event === "practice problem solved") {
439
+ if (record.hint_used === true) {
440
+ // Hint users gravitate toward easy problems
441
+ if (chance.bool({ likelihood: 60 })) {
442
+ record.difficulty = "easy";
443
+ }
444
+ record.independent_solver = false;
445
+ } else if (record.hint_used === false) {
446
+ // Independent solvers tackle harder problems
447
+ if (chance.bool({ likelihood: 40 })) {
448
+ record.difficulty = "hard";
449
+ record.independent_solver = true;
450
+ } else {
451
+ record.independent_solver = false;
452
+ }
453
+ } else {
454
+ record.independent_solver = false;
455
+ }
456
+ }
457
+ }
458
+
459
+ // ═══════════════════════════════════════════════════════════════════
460
+ // Hook #6: SEMESTER-END SPIKE
461
+ // ═══════════════════════════════════════════════════════════════════
462
+ if (type === "event") {
463
+ if (record.time) {
464
+ const eventTime = dayjs(record.time);
465
+ const dayInDataset = eventTime.diff(DATASET_START, 'days', true);
466
+
467
+ const spikableEvents = ["quiz started", "quiz completed", "assignment submitted"];
468
+ if (spikableEvents.includes(record.event)) {
469
+ if (dayInDataset >= 75 && dayInDataset <= 85) {
470
+ record.semester_end_rush = true;
471
+
472
+ // 50% chance to duplicate
473
+ if (chance.bool({ likelihood: 50 })) {
474
+ const duplicate = JSON.parse(JSON.stringify(record));
475
+ duplicate.time = eventTime.add(chance.integer({ min: 5, max: 120 }), 'minutes').toISOString();
476
+ duplicate.semester_end_rush = true;
477
+ return [record, duplicate];
478
+ }
479
+ } else {
480
+ record.semester_end_rush = false;
481
+ }
482
+ }
483
+ }
484
+ }
485
+
486
+ // ═══════════════════════════════════════════════════════════════════
487
+ // Hook #8: PLAYBACK SPEED CORRELATION
488
+ // ═══════════════════════════════════════════════════════════════════
489
+ if (type === "event") {
490
+ if (record.event === "lecture completed") {
491
+ const speed = record.playback_speed;
492
+
493
+ if (speed >= 1.5) {
494
+ record.speed_learner = true;
495
+ record.thorough_learner = false;
496
+ // Compress watch time for speed learners
497
+ if (record.watch_time_mins !== undefined) {
498
+ record.watch_time_mins = Math.max(3, Math.floor(record.watch_time_mins * 0.6));
499
+ }
500
+ } else if (speed !== undefined && speed <= 1.0) {
501
+ record.speed_learner = false;
502
+ record.thorough_learner = true;
503
+ // Extend watch time for thorough learners
504
+ if (record.watch_time_mins !== undefined) {
505
+ record.watch_time_mins = Math.min(90, Math.floor(record.watch_time_mins * 1.4));
506
+ }
507
+ } else {
508
+ record.speed_learner = false;
509
+ record.thorough_learner = false;
510
+ }
511
+ }
512
+ }
513
+
514
+ // ═══════════════════════════════════════════════════════════════════
515
+ // Hook #3: NOTES-TAKERS SUCCEED
516
+ // Hook #4: STUDY GROUP RETENTION
517
+ // Hook #7: FREE VS PAID (funnel-pre handled below)
518
+ // ═══════════════════════════════════════════════════════════════════
519
+ if (type === "everything") {
520
+ const userEvents = record;
521
+ const firstEventTime = userEvents.length > 0 ? dayjs(userEvents[0].time) : null;
522
+
523
+ // ---------------------------------------------------------------
524
+ // First pass: identify user patterns
525
+ // ---------------------------------------------------------------
526
+ let notesTakenCount = 0;
527
+ let joinedStudyGroupEarly = false;
528
+ let hasLowQuizScore = false;
529
+
530
+ userEvents.forEach((event) => {
531
+ const eventTime = dayjs(event.time);
532
+ const daysSinceStart = firstEventTime ? eventTime.diff(firstEventTime, 'days', true) : 0;
533
+
534
+ // Hook #3: Count lecture_completed events where notes_taken === true
535
+ if (event.event === "lecture completed" && event.notes_taken === true) {
536
+ notesTakenCount++;
537
+ }
538
+
539
+ // Hook #4: Check if user joined a study group within the first 10 days
540
+ if (event.event === "study group joined" && daysSinceStart <= 10) {
541
+ joinedStudyGroupEarly = true;
542
+ }
543
+
544
+ // Hook #4: Check for any quiz_completed with score < 60
545
+ if (event.event === "quiz completed" && event.score_percent < 60) {
546
+ hasLowQuizScore = true;
547
+ }
548
+ });
549
+
550
+ // ---------------------------------------------------------------
551
+ // Second pass: modify events based on patterns
552
+ // ---------------------------------------------------------------
553
+
554
+ // Hook #3: NOTES-TAKERS SUCCEED
555
+ if (notesTakenCount >= 5) {
556
+ userEvents.forEach((event, idx) => {
557
+ // Boost quiz scores for diligent note-takers
558
+ if (event.event === "quiz completed") {
559
+ if (event.score_percent !== undefined) {
560
+ event.score_percent = Math.min(100, event.score_percent + 20);
561
+ }
562
+ event.diligent_student = true;
563
+ }
564
+ });
565
+
566
+ // 40% chance to splice in an extra certificate_earned event
567
+ if (chance.bool({ likelihood: 40 })) {
568
+ const lastEvent = userEvents[userEvents.length - 1];
569
+ if (lastEvent) {
570
+ const certEvent = {
571
+ event: "certificate earned",
572
+ time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 5 }), 'days').toISOString(),
573
+ user_id: lastEvent.user_id,
574
+ course_id: chance.pickone(courseIds),
575
+ completion_time_days: chance.integer({ min: 14, max: 90 }),
576
+ final_grade: chance.integer({ min: 80, max: 100 }),
577
+ diligent_student: true,
578
+ };
579
+ userEvents.push(certEvent);
580
+ }
581
+ }
582
+ }
583
+
584
+ // Hook #4: STUDY GROUP RETENTION
585
+ if (!joinedStudyGroupEarly && hasLowQuizScore) {
586
+ // Non-joiners with low scores: remove 70% of events after day 14 (churn)
587
+ const churnCutoff = firstEventTime ? firstEventTime.add(14, 'days') : null;
588
+ for (let i = userEvents.length - 1; i >= 0; i--) {
589
+ const evt = userEvents[i];
590
+ if (churnCutoff && dayjs(evt.time).isAfter(churnCutoff)) {
591
+ if (chance.bool({ likelihood: 70 })) {
592
+ userEvents.splice(i, 1);
593
+ }
594
+ }
595
+ }
596
+ } else if (joinedStudyGroupEarly) {
597
+ // Study group joiners keep all events and get bonus discussion_posted events
598
+ const lastEvent = userEvents[userEvents.length - 1];
599
+ if (lastEvent && chance.bool({ likelihood: 60 })) {
600
+ const bonusDiscussion = {
601
+ event: "discussion posted",
602
+ time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 3 }), 'days').toISOString(),
603
+ user_id: lastEvent.user_id,
604
+ course_id: chance.pickone(courseIds),
605
+ post_type: chance.pickone(["question", "answer", "comment"]),
606
+ word_count: chance.integer({ min: 20, max: 400 }),
607
+ study_group_member: true,
608
+ };
609
+ userEvents.push(bonusDiscussion);
610
+ }
611
+ }
612
+
613
+ // Hook #8 (everything pass): Speed learners get slightly HIGHER quiz scores
614
+ let isSpeedLearner = false;
615
+ userEvents.forEach((event) => {
616
+ if (event.event === "lecture completed" && event.speed_learner === true) {
617
+ isSpeedLearner = true;
618
+ }
619
+ });
620
+
621
+ if (isSpeedLearner) {
622
+ userEvents.forEach((event) => {
623
+ if (event.event === "quiz completed") {
624
+ if (event.score_percent !== undefined) {
625
+ event.score_percent = Math.min(100, event.score_percent + 8);
626
+ event.speed_learner_effect = true;
627
+ }
628
+ }
629
+ });
630
+ }
631
+ }
632
+
633
+ // ═══════════════════════════════════════════════════════════════════
634
+ // Hook #7: FREE VS PAID COURSES (funnel-pre)
635
+ // ═══════════════════════════════════════════════════════════════════
636
+ if (type === "funnel-pre") {
637
+ // Target funnels containing course completion events
638
+ if (meta && meta.profile && meta.funnel) {
639
+ const subscriptionStatus = meta.profile.subscription_status;
640
+
641
+ if (subscriptionStatus === "free") {
642
+ // Free users convert at 0.6x rate
643
+ record.conversionRate = (record.conversionRate || 0.25) * 0.6;
644
+ } else if (subscriptionStatus === "monthly" || subscriptionStatus === "annual") {
645
+ // Paid subscribers convert at 1.3x rate
646
+ record.conversionRate = (record.conversionRate || 0.25) * 1.3;
647
+ }
648
+ }
649
+ }
650
+
651
+ return record;
652
+ }
653
+ };
654
+
655
+ export default config;
656
+
657
+ /**
658
+ * ═══════════════════════════════════════════════════════════════════════════════
659
+ * NEEDLE IN A HAYSTACK - LEARNPATH ELEARNING ANALYTICS
660
+ * ═══════════════════════════════════════════════════════════════════════════════
661
+ *
662
+ * An online learning platform dungeon with 8 deliberately architected analytics
663
+ * insights hidden in the data. This dungeon simulates a real EdTech product
664
+ * (like Coursera, Khan Academy, or Udemy) and demonstrates how meaningful
665
+ * student behavior patterns can be discovered through product analytics.
666
+ *
667
+ * ═══════════════════════════════════════════════════════════════════════════════
668
+ * DATASET OVERVIEW
669
+ * ═══════════════════════════════════════════════════════════════════════════════
670
+ *
671
+ * - 5,000 users over 100 days
672
+ * - 360K events across 17 event types
673
+ * - 3 funnels (student onboarding, course completion, practice mastery)
674
+ * - Group analytics (courses, study groups)
675
+ * - Lookup tables (courses, quizzes)
676
+ * - Subscription tiers (free, monthly, annual)
677
+ *
678
+ * ═══════════════════════════════════════════════════════════════════════════════
679
+ * THE 8 ARCHITECTED HOOKS
680
+ * ═══════════════════════════════════════════════════════════════════════════════
681
+ *
682
+ * Each hook creates a specific, discoverable analytics insight that simulates
683
+ * real-world EdTech behavior patterns.
684
+ *
685
+ * ───────────────────────────────────────────────────────────────────────────────
686
+ * 1. STUDENT VS INSTRUCTOR PROFILES
687
+ * ───────────────────────────────────────────────────────────────────────────────
688
+ *
689
+ * PATTERN: User profiles are enriched based on account_type. Instructors receive
690
+ * teaching attributes (courses_created, teaching_experience_years, instructor_rating).
691
+ * Students receive learning attributes (learning_goal, study_hours_per_week).
692
+ *
693
+ * HOW TO FIND IT:
694
+ * - Segment users by: account_type = "instructor" vs "student"
695
+ * - Compare: presence of courses_created vs learning_goal properties
696
+ * - Filter profiles: instructor_rating exists (instructor-only property)
697
+ *
698
+ * EXPECTED INSIGHT: ~11% of users are instructors with teaching-specific metrics.
699
+ * Instructors should show different event patterns (more feedback given, fewer
700
+ * quizzes completed). Students show learning-goal-driven behavior differences.
701
+ *
702
+ * REAL-WORLD ANALOGUE: Two-sided marketplace profiling. Drivers vs riders in
703
+ * Uber, sellers vs buyers in eBay - each persona has unique attributes and
704
+ * behavioral patterns that require separate analysis.
705
+ *
706
+ * ───────────────────────────────────────────────────────────────────────────────
707
+ * 2. DEADLINE CRAMMING
708
+ * ───────────────────────────────────────────────────────────────────────────────
709
+ *
710
+ * PATTERN: Assignments submitted on Sundays and Mondays show deadline-rush
711
+ * behavior: 60% are late (vs ~20% baseline) and quiz scores drop by 15 points.
712
+ * These events carry is_deadline_rush: true.
713
+ *
714
+ * HOW TO FIND IT:
715
+ * - Chart: assignment_submitted by day of week
716
+ * - Compare: is_late rate by day of week
717
+ * - Compare: quiz_completed score_percent by day of week
718
+ * - Filter: is_deadline_rush = true
719
+ *
720
+ * EXPECTED INSIGHT: Clear quality drop on Sun/Mon. Late submission rate spikes
721
+ * from ~20% to ~60%. Quiz scores taken on crunch days average 15 points lower.
722
+ * This creates a visible "weekend dip" in student performance metrics.
723
+ *
724
+ * REAL-WORLD ANALOGUE: The "Sunday Scaries" of EdTech - students procrastinate
725
+ * and cram before Monday deadlines. Identical to real patterns seen in Coursera
726
+ * and university LMS data where submission quality drops near deadlines.
727
+ *
728
+ * ───────────────────────────────────────────────────────────────────────────────
729
+ * 3. NOTES-TAKERS SUCCEED
730
+ * ───────────────────────────────────────────────────────────────────────────────
731
+ *
732
+ * PATTERN: Students who take notes during 5 or more lecture_completed events
733
+ * receive a +20 boost to all quiz scores (capped at 100), and have a 40% chance
734
+ * of earning an extra certificate. Events are marked diligent_student: true.
735
+ *
736
+ * HOW TO FIND IT:
737
+ * - Create segment: users with 5+ lecture_completed where notes_taken = true
738
+ * - Compare: average quiz_completed score_percent
739
+ * - Compare: certificate_earned count per user
740
+ * - Filter: diligent_student = true
741
+ *
742
+ * EXPECTED INSIGHT: Diligent note-takers score ~20 points higher on quizzes
743
+ * and earn certificates at a significantly higher rate. This is a classic
744
+ * "active learning" signal visible in the data.
745
+ *
746
+ * REAL-WORLD ANALOGUE: Active engagement features (highlighting, bookmarking,
747
+ * note-taking) that correlate with better learning outcomes. Real research
748
+ * confirms note-taking improves retention by 30-40% - this hook models that.
749
+ *
750
+ * ───────────────────────────────────────────────────────────────────────────────
751
+ * 4. STUDY GROUP RETENTION
752
+ * ───────────────────────────────────────────────────────────────────────────────
753
+ *
754
+ * PATTERN: Students who join a study group within their first 10 days and have
755
+ * passing quiz scores retain normally and receive bonus discussion events. Students
756
+ * who do NOT join early AND have quiz scores below 60 experience severe churn:
757
+ * 70% of their events after day 14 are removed.
758
+ *
759
+ * HOW TO FIND IT:
760
+ * - Create cohort: users who did "study group joined" within first 10 days
761
+ * - Compare: D14/D30 retention rate vs non-joiners
762
+ * - Compare: total events per user after day 14
763
+ * - Filter: study_group_member = true on bonus events
764
+ *
765
+ * EXPECTED INSIGHT: Early study group joiners show dramatically better retention
766
+ * curves. Non-joiners with low quiz scores show a cliff-like drop in activity
767
+ * after day 14. The combination of social isolation + poor performance predicts
768
+ * churn with high accuracy.
769
+ *
770
+ * REAL-WORLD ANALOGUE: Social learning features that create accountability and
771
+ * community. MOOCs with study groups or cohort-based programs consistently show
772
+ * 3-5x higher completion rates than pure self-paced learning.
773
+ *
774
+ * ───────────────────────────────────────────────────────────────────────────────
775
+ * 5. HINT DEPENDENCY
776
+ * ───────────────────────────────────────────────────────────────────────────────
777
+ *
778
+ * PATTERN: In practice_problem_solved events, students who use hints have a 60%
779
+ * chance of having their problem difficulty set to "easy". Students who solve
780
+ * without hints have a 40% chance of tackling "hard" problems and receive
781
+ * independent_solver: true.
782
+ *
783
+ * HOW TO FIND IT:
784
+ * - Segment practice_problem_solved by: hint_used = true vs false
785
+ * - Compare: difficulty distribution (easy vs medium vs hard)
786
+ * - Filter: independent_solver = true
787
+ * - Compare: average time_to_solve_sec by hint usage
788
+ *
789
+ * EXPECTED INSIGHT: Hint users cluster on easy problems; non-hint users tackle
790
+ * harder problems. This creates a visible "hint dependency" where the scaffolding
791
+ * intended to help students actually limits their growth trajectory.
792
+ *
793
+ * REAL-WORLD ANALOGUE: The "training wheels" problem in education technology.
794
+ * Hints, auto-complete, and guided solutions can create dependency rather than
795
+ * building genuine competence. Real platforms like LeetCode and HackerRank
796
+ * observe this pattern.
797
+ *
798
+ * ───────────────────────────────────────────────────────────────────────────────
799
+ * 6. SEMESTER-END SPIKE
800
+ * ───────────────────────────────────────────────────────────────────────────────
801
+ *
802
+ * PATTERN: During days 75-85 of the dataset, quiz_started, quiz_completed, and
803
+ * assignment_submitted events have a 50% chance of being duplicated (with slightly
804
+ * offset timestamps). All events in this window carry semester_end_rush: true.
805
+ *
806
+ * HOW TO FIND IT:
807
+ * - Chart: quiz_started, quiz_completed, assignment_submitted counts by day
808
+ * - Look for: clear volume spike during days 75-85
809
+ * - Filter: semester_end_rush = true
810
+ * - Compare: event volume in days 75-85 vs days 60-75 (baseline)
811
+ *
812
+ * EXPECTED INSIGHT: Assessment activity roughly doubles during the "finals"
813
+ * period. This creates a visible spike in the time series that mirrors real
814
+ * academic calendar patterns.
815
+ *
816
+ * REAL-WORLD ANALOGUE: End-of-semester, end-of-quarter, or end-of-trial
817
+ * behavior spikes. Every EdTech platform sees massive activity surges before
818
+ * deadlines, certification exams, or subscription renewal dates.
819
+ *
820
+ * ───────────────────────────────────────────────────────────────────────────────
821
+ * 7. FREE VS PAID COURSES
822
+ * ───────────────────────────────────────────────────────────────────────────────
823
+ *
824
+ * PATTERN: The Course Completion funnel conversion rate is modified by the user's
825
+ * subscription_status. Free users convert at 0.6x the base rate; monthly and
826
+ * annual subscribers convert at 1.3x. This creates a ~2.2x difference between
827
+ * free and paid users in course completion.
828
+ *
829
+ * HOW TO FIND IT:
830
+ * - Segment the Course Completion funnel by: subscription_status
831
+ * - Compare: funnel conversion rates for free vs monthly vs annual
832
+ * - Compare: certificate_earned counts by subscription_status
833
+ *
834
+ * EXPECTED INSIGHT: Paid subscribers are roughly 2x more likely to complete
835
+ * courses end-to-end. Free users drop off heavily between quiz_completed and
836
+ * certificate_earned. This mirrors the "skin in the game" effect.
837
+ *
838
+ * REAL-WORLD ANALOGUE: The well-documented correlation between payment and
839
+ * completion in online education. Paid Coursera learners complete courses at
840
+ * 5-10x the rate of free audit-track learners. Financial commitment creates
841
+ * psychological commitment.
842
+ *
843
+ * ───────────────────────────────────────────────────────────────────────────────
844
+ * 8. PLAYBACK SPEED CORRELATION
845
+ * ───────────────────────────────────────────────────────────────────────────────
846
+ *
847
+ * PATTERN: In lecture_completed events, playback speed creates two distinct
848
+ * learner segments:
849
+ * - Speed learners (>= 1.5x): get speed_learner: true, compressed watch_time
850
+ * (0.6x), and paradoxically HIGHER quiz scores (+8 points)
851
+ * - Thorough learners (<= 1.0x): get thorough_learner: true, extended watch_time
852
+ * (1.4x)
853
+ *
854
+ * HOW TO FIND IT:
855
+ * - Segment lecture_completed by: playback_speed
856
+ * - Compare: average watch_time_mins by speed bucket
857
+ * - Compare: subsequent quiz_completed score_percent
858
+ * - Filter: speed_learner = true or thorough_learner = true
859
+ * - Correlate: playback_speed with quiz performance
860
+ *
861
+ * EXPECTED INSIGHT: Counter-intuitively, speed learners score slightly higher
862
+ * on quizzes despite watching lectures faster. This suggests that playback speed
863
+ * is a proxy for prior knowledge or aptitude, not laziness.
864
+ *
865
+ * REAL-WORLD ANALOGUE: Research on lecture playback speed consistently shows
866
+ * that students who watch at 1.5-2x speed perform equally or better on assessments.
867
+ * Speed selection correlates with confidence and familiarity with the material,
868
+ * not with learning quality.
869
+ *
870
+ * ═══════════════════════════════════════════════════════════════════════════════
871
+ * ADVANCED ANALYSIS IDEAS
872
+ * ═══════════════════════════════════════════════════════════════════════════════
873
+ *
874
+ * CROSS-HOOK PATTERNS:
875
+ *
876
+ * 1. The Ideal Student: Users who:
877
+ * - Take notes consistently (Hook #3)
878
+ * - Join study groups early (Hook #4)
879
+ * - Solve problems without hints (Hook #5)
880
+ * - Have paid subscriptions (Hook #7)
881
+ * - Watch lectures at higher speed (Hook #8)
882
+ * These students should have exceptional completion rates and quiz scores.
883
+ *
884
+ * 2. The Cramming Cascade: Do deadline crammers (Hook #2) also show up
885
+ * in the semester-end spike (Hook #6)? Is the quality drop compounded?
886
+ *
887
+ * 3. Social Safety Net: Does early study group joining (Hook #4) prevent
888
+ * churn even for students who struggle on quizzes?
889
+ *
890
+ * 4. Hint-to-Mastery Pipeline: Do hint-dependent students (Hook #5) who
891
+ * later join study groups (Hook #4) eventually wean off hints?
892
+ *
893
+ * 5. Payment + Notes: Are paid subscribers (Hook #7) more likely to take
894
+ * notes (Hook #3)? Does the combination create a super-performer segment?
895
+ *
896
+ * COHORT ANALYSIS:
897
+ *
898
+ * - Cohort by education level: Do PhD students vs self-taught learners
899
+ * show different hook patterns?
900
+ * - Cohort by learning style: Do visual vs hands-on learners take more notes?
901
+ * - Cohort by platform: Do mobile (iOS/Android) users have different playback
902
+ * speed preferences than Web/iPad users?
903
+ * - Cohort by course category: Do CS students use hints more than Arts students?
904
+ *
905
+ * FUNNEL ANALYSIS:
906
+ *
907
+ * - Onboarding Funnel: How does account_type affect the register -> enroll ->
908
+ * first lecture conversion?
909
+ * - Course Completion Funnel: Compare by subscription_status, note-taking
910
+ * behavior, and study group membership
911
+ * - Practice Mastery Funnel: Compare by hint usage, playback speed, and
912
+ * learning style
913
+ *
914
+ * ═══════════════════════════════════════════════════════════════════════════════
915
+ * EXPECTED METRICS SUMMARY
916
+ * ═══════════════════════════════════════════════════════════════════════════════
917
+ *
918
+ * Hook | Metric | Baseline | Hook Effect | Ratio
919
+ * ────────────────────────|───────────────────────|──────────|──────────────|──────
920
+ * Student vs Instructor | Profile attributes | generic | role-specific| N/A
921
+ * Deadline Cramming | Late submission rate | ~20% | ~60% | 3x
922
+ * Deadline Cramming | Quiz score (Sun/Mon) | ~65 | ~50 | -15pt
923
+ * Notes-Takers Succeed | Quiz score | ~65 | ~85 | +20pt
924
+ * Notes-Takers Succeed | Certificate rate | baseline | +40% | 1.4x
925
+ * Study Group Retention | D14 retention | ~40% | ~90% | 2.3x
926
+ * Study Group Retention | Post-D14 events | 100% | 30% (churn) | 0.3x
927
+ * Hint Dependency | Easy problem rate | ~33% | ~60% | 1.8x
928
+ * Hint Dependency | Hard problem rate | ~33% | ~40% (no hint)| 1.2x
929
+ * Semester-End Spike | Assessment volume | baseline | ~2x | 2x
930
+ * Free vs Paid | Course completion | 15% | 33% | 2.2x
931
+ * Playback Speed | Quiz score (speed) | ~65 | ~73 | +8pt
932
+ *
933
+ * ═══════════════════════════════════════════════════════════════════════════════
934
+ * HOW TO RUN THIS DUNGEON
935
+ * ═══════════════════════════════════════════════════════════════════════════════
936
+ *
937
+ * From the dm4 root directory:
938
+ *
939
+ * npm start
940
+ *
941
+ * Or programmatically:
942
+ *
943
+ * import generate from './index.js';
944
+ * import config from './dungeons/harness-education.js';
945
+ * const results = await generate(config);
946
+ *
947
+ * OUTPUT FILES (with writeToDisk: true, format: "json", gzip: true):
948
+ *
949
+ * - needle-haystack-education__events.json.gz - All event data
950
+ * - needle-haystack-education__user_profiles.json.gz - User profiles
951
+ * - needle-haystack-education__group_profiles.json.gz - Course & study group profiles
952
+ * - needle-haystack-education__course_id_lookup.json.gz - Course catalog
953
+ * - needle-haystack-education__quiz_id_lookup.json.gz - Quiz catalog
954
+ *
955
+ * ═══════════════════════════════════════════════════════════════════════════════
956
+ * TESTING YOUR ANALYTICS PLATFORM
957
+ * ═══════════════════════════════════════════════════════════════════════════════
958
+ *
959
+ * This dungeon is perfect for testing:
960
+ *
961
+ * 1. Segmentation: Can you separate instructor vs student behavior patterns?
962
+ * 2. Temporal Analysis: Can you detect the deadline cramming and semester-end spike?
963
+ * 3. Behavioral Correlation: Can you discover the note-taking success pattern?
964
+ * 4. Retention Analysis: Can you identify the study group retention effect?
965
+ * 5. Feature Impact: Can you measure hint dependency on problem difficulty?
966
+ * 6. Anomaly Detection: Can you automatically detect the semester-end volume spike?
967
+ * 7. Funnel Analysis: Can you quantify the free vs paid completion gap?
968
+ * 8. Counter-intuitive Insight: Can you find the speed learner paradox?
969
+ *
970
+ * ═══════════════════════════════════════════════════════════════════════════════
971
+ * WHY "NEEDLE IN A HAYSTACK"?
972
+ * ═══════════════════════════════════════════════════════════════════════════════
973
+ *
974
+ * Each hook is a "needle" - a meaningful, actionable insight hidden in a
975
+ * "haystack" of 360K events. The challenge is:
976
+ *
977
+ * 1. FINDING the needles (discovery)
978
+ * 2. VALIDATING they are real patterns (statistical significance)
979
+ * 3. UNDERSTANDING why they matter (educational impact)
980
+ * 4. ACTING on them (platform improvements)
981
+ *
982
+ * This mirrors real-world EdTech analytics: your data contains valuable insights
983
+ * about student success, but you need the right tools and skills to find them.
984
+ *
985
+ * Happy Learning!
986
+ *
987
+ * ═══════════════════════════════════════════════════════════════════════════════
988
+ */