make-mp-data 2.1.11 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/dungeons/adspend.js +2 -2
- package/dungeons/ai-chat-analytics-ed.js +3 -2
- package/dungeons/anon.js +2 -2
- package/dungeons/array-of-object-loopup.js +181 -0
- package/dungeons/benchmark-heavy.js +241 -0
- package/dungeons/benchmark-light.js +141 -0
- package/dungeons/big.js +9 -8
- package/dungeons/business.js +2 -1
- package/dungeons/clinch-agi.js +632 -0
- package/dungeons/complex.js +3 -2
- package/dungeons/copilot.js +383 -0
- package/dungeons/ecommerce-store.js +0 -0
- package/dungeons/experiments.js +5 -4
- package/dungeons/foobar.js +1 -1
- package/dungeons/funnels.js +2 -2
- package/dungeons/gaming.js +3 -2
- package/dungeons/harness/harness-education.js +988 -0
- package/dungeons/harness/harness-fintech.js +976 -0
- package/dungeons/harness/harness-food.js +985 -0
- package/dungeons/harness/harness-gaming.js +1178 -0
- package/dungeons/harness/harness-media.js +961 -0
- package/dungeons/harness/harness-sass.js +923 -0
- package/dungeons/harness/harness-social.js +928 -0
- package/dungeons/kurby.js +211 -0
- package/dungeons/media.js +5 -4
- package/dungeons/mil.js +4 -3
- package/dungeons/mirror.js +2 -2
- package/dungeons/money2020-ed.js +8 -7
- package/dungeons/sanity.js +3 -2
- package/dungeons/scd.js +3 -2
- package/dungeons/simple.js +30 -15
- package/dungeons/strict-event-test.js +30 -0
- package/dungeons/student-teacher.js +3 -2
- package/dungeons/text-generation.js +84 -85
- package/dungeons/too-big-events.js +166 -0
- package/dungeons/uday-schema.json +220 -0
- package/dungeons/userAgent.js +4 -3
- package/index.js +41 -54
- package/lib/core/config-validator.js +122 -7
- package/lib/core/context.js +7 -14
- package/lib/core/storage.js +57 -25
- package/lib/generators/adspend.js +12 -12
- package/lib/generators/events.js +6 -5
- package/lib/generators/funnels.js +32 -10
- package/lib/generators/product-lookup.js +262 -0
- package/lib/generators/product-names.js +195 -0
- package/lib/generators/profiles.js +3 -3
- package/lib/generators/scd.js +13 -3
- package/lib/generators/text.js +17 -4
- package/lib/orchestrators/mixpanel-sender.js +244 -204
- package/lib/orchestrators/user-loop.js +54 -16
- package/lib/templates/funnels-instructions.txt +272 -0
- package/lib/templates/hook-examples.json +187 -0
- package/lib/templates/hooks-instructions.txt +295 -8
- package/lib/templates/phrases.js +473 -16
- package/lib/templates/refine-instructions.txt +485 -0
- package/lib/templates/schema-instructions.txt +239 -109
- package/lib/templates/schema.d.ts +173 -0
- package/lib/templates/verbose-schema.js +140 -206
- package/lib/utils/ai.js +853 -77
- package/lib/utils/chart.js +210 -0
- package/lib/utils/function-registry.js +285 -0
- package/lib/utils/json-evaluator.js +172 -0
- package/lib/utils/logger.js +38 -0
- package/lib/utils/mixpanel.js +101 -0
- package/lib/utils/project.js +3 -2
- package/lib/utils/utils.js +41 -4
- package/package.json +15 -21
- package/types.d.ts +15 -5
- package/lib/generators/text-bak-old.js +0 -1121
- package/lib/orchestrators/worker-manager.js +0 -203
- package/lib/templates/phrases-bak.js +0 -925
- package/lib/templates/prompt (old).txt +0 -98
- package/lib/templates/scratch-dungeon-template.js +0 -116
- package/lib/templates/textQuickTest.js +0 -172
|
@@ -0,0 +1,988 @@
|
|
|
1
|
+
import dayjs from "dayjs";
|
|
2
|
+
import utc from "dayjs/plugin/utc.js";
|
|
3
|
+
import "dotenv/config";
|
|
4
|
+
import * as u from "../../lib/utils/utils.js";
|
|
5
|
+
import * as v from "ak-tools";
|
|
6
|
+
|
|
7
|
+
const SEED = "harness-education";
|
|
8
|
+
dayjs.extend(utc);
|
|
9
|
+
const chance = u.initChance(SEED);
|
|
10
|
+
const num_users = 5_000;
|
|
11
|
+
const days = 100;
|
|
12
|
+
|
|
13
|
+
/** @typedef {import("../../types.js").Dungeon} Config */
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* NEEDLE IN A HAYSTACK - ELEARNING APP DESIGN
|
|
17
|
+
*
|
|
18
|
+
* LearnPath - An online learning platform modeled after Coursera, Khan Academy, and Udemy.
|
|
19
|
+
* The platform supports both self-paced and cohort-based learning, with a robust ecosystem
|
|
20
|
+
* of courses, quizzes, assignments, and social study features.
|
|
21
|
+
*
|
|
22
|
+
* CORE LEARNING LOOP:
|
|
23
|
+
* Students register accounts, browse and enroll in courses across multiple categories
|
|
24
|
+
* (CS, Math, Science, Business, Arts, Languages), then progress through structured
|
|
25
|
+
* modules consisting of lectures, practice problems, quizzes, and assignments. Successful
|
|
26
|
+
* completion of all requirements earns a certificate. The platform emphasizes active
|
|
27
|
+
* learning through note-taking, practice problems, and peer discussion.
|
|
28
|
+
*
|
|
29
|
+
* COURSE SYSTEM (events: course enrolled -> lecture started -> lecture completed):
|
|
30
|
+
* - Six course categories spanning technical and creative disciplines
|
|
31
|
+
* - Three difficulty tiers: beginner, intermediate, advanced
|
|
32
|
+
* - Free and paid course options (drives subscription analytics)
|
|
33
|
+
* - 150 unique courses with varying lengths, ratings, and enrollment counts
|
|
34
|
+
* - Modules (1-12 per course) contain lectures, quizzes, and assignments
|
|
35
|
+
*
|
|
36
|
+
* LECTURE EXPERIENCE (events: lecture started -> lecture completed):
|
|
37
|
+
* - Variable lecture durations (5-60 minutes) reflecting real MOOC patterns
|
|
38
|
+
* - Playback speed options (0.75x to 2.0x) reveal learning style differences
|
|
39
|
+
* - Note-taking tracking creates a behavioral signal for student diligence
|
|
40
|
+
* - Watch time vs. lecture duration measures actual engagement
|
|
41
|
+
*
|
|
42
|
+
* ASSESSMENT SYSTEM (events: quiz started -> quiz completed, assignment submitted -> graded):
|
|
43
|
+
* - Practice quizzes (low stakes, unlimited attempts) vs. graded quizzes vs. final exams
|
|
44
|
+
* - Assignments support text, code, file upload, and project submissions
|
|
45
|
+
* - Grading by instructors, peers, or auto-grader (reflects real platform patterns)
|
|
46
|
+
* - Score tracking enables learning outcome analytics
|
|
47
|
+
*
|
|
48
|
+
* PRACTICE PROBLEMS (event: practice problem solved):
|
|
49
|
+
* - Difficulty-tiered problems (easy, medium, hard) for skill building
|
|
50
|
+
* - Hint system creates a measurable dependency pattern
|
|
51
|
+
* - Time-to-solve metrics reveal mastery progression
|
|
52
|
+
* - High volume (weight: 12) reflects real platform usage patterns
|
|
53
|
+
*
|
|
54
|
+
* SOCIAL LEARNING (events: discussion posted, study group joined):
|
|
55
|
+
* - Discussion forums with questions, answers, and comments
|
|
56
|
+
* - Study groups (study circles, project teams, tutoring groups)
|
|
57
|
+
* - Social features drive retention (a key hook pattern)
|
|
58
|
+
*
|
|
59
|
+
* INSTRUCTOR ECOSYSTEM (events: instructor feedback given, assignment graded):
|
|
60
|
+
* - Instructors create courses, grade assignments, and provide feedback
|
|
61
|
+
* - Written, video, and rubric-based feedback types
|
|
62
|
+
* - Response time tracking (1-72 hours) measures instructor engagement
|
|
63
|
+
*
|
|
64
|
+
* MONETIZATION (event: subscription purchased):
|
|
65
|
+
* - Three tiers: monthly ($19.99), annual ($149.99), lifetime ($499.99)
|
|
66
|
+
* - Free tier with limited access (most users)
|
|
67
|
+
* - Subscription status affects course completion funnels (Hook #7)
|
|
68
|
+
*
|
|
69
|
+
* SUPPORT SYSTEM (event: help requested):
|
|
70
|
+
* - Four topic categories: technical, content, billing, accessibility
|
|
71
|
+
* - Three channels: chat, email, forum
|
|
72
|
+
* - Tracks student friction points
|
|
73
|
+
*
|
|
74
|
+
* COURSE REVIEWS (event: course reviewed):
|
|
75
|
+
* - 1-5 star ratings with written reviews
|
|
76
|
+
* - Would-recommend boolean for NPS-style analysis
|
|
77
|
+
* - Review length correlates with sentiment strength
|
|
78
|
+
*
|
|
79
|
+
* WHY THESE EVENTS/PROPERTIES?
|
|
80
|
+
* - Events model the complete student lifecycle: onboarding -> engagement -> mastery -> certification
|
|
81
|
+
* - Properties enable cohort analysis: learning style, education level, account type, subscription status
|
|
82
|
+
* - Funnels reveal friction: where do students drop off in onboarding, course completion, practice mastery?
|
|
83
|
+
* - Behavioral signals (notes, hints, playback speed, study groups) create discoverable skill gaps
|
|
84
|
+
* - Social features (study groups, discussions) and monetization (subscriptions) drive business metrics
|
|
85
|
+
* - The "needle in haystack" hooks simulate real EdTech insights hidden in production data
|
|
86
|
+
*/
|
|
87
|
+
|
|
88
|
+
// Generate consistent IDs for lookup tables and event properties
|
|
89
|
+
const courseIds = v.range(1, 151).map(n => `course_${v.uid(6)}`);
|
|
90
|
+
const quizIds = v.range(1, 401).map(n => `quiz_${v.uid(6)}`);
|
|
91
|
+
const groupIds = v.range(1, 301).map(n => `group_${v.uid(6)}`);
|
|
92
|
+
const lectureIds = v.range(1, 501).map(n => `lecture_${v.uid(6)}`);
|
|
93
|
+
const assignmentIds = v.range(1, 201).map(n => `assignment_${v.uid(6)}`);
|
|
94
|
+
const problemIds = v.range(1, 601).map(n => `problem_${v.uid(6)}`);
|
|
95
|
+
|
|
96
|
+
/** @type {Config} */
|
|
97
|
+
const config = {
|
|
98
|
+
token: "10a2fd5d566edd19e803036b276fe91b",
|
|
99
|
+
seed: SEED,
|
|
100
|
+
numDays: days,
|
|
101
|
+
numEvents: num_users * 120,
|
|
102
|
+
numUsers: num_users,
|
|
103
|
+
hasAnonIds: false,
|
|
104
|
+
hasSessionIds: true,
|
|
105
|
+
format: "json",
|
|
106
|
+
gzip: true,
|
|
107
|
+
alsoInferFunnels: false,
|
|
108
|
+
hasLocation: true,
|
|
109
|
+
hasAndroidDevices: true,
|
|
110
|
+
hasIOSDevices: true,
|
|
111
|
+
hasDesktopDevices: true,
|
|
112
|
+
hasBrowser: false,
|
|
113
|
+
hasCampaigns: false,
|
|
114
|
+
isAnonymous: false,
|
|
115
|
+
hasAdSpend: false,
|
|
116
|
+
percentUsersBornInDataset: 50,
|
|
117
|
+
hasAvatar: true,
|
|
118
|
+
makeChart: false,
|
|
119
|
+
batchSize: 2_500_000,
|
|
120
|
+
concurrency: 10,
|
|
121
|
+
writeToDisk: false,
|
|
122
|
+
|
|
123
|
+
funnels: [
|
|
124
|
+
{
|
|
125
|
+
sequence: ["account registered", "course enrolled", "lecture started"],
|
|
126
|
+
isFirstFunnel: true,
|
|
127
|
+
conversionRate: 75,
|
|
128
|
+
timeToConvert: 1,
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
// Core learning loop: students watch lectures and do practice problems constantly
|
|
132
|
+
sequence: ["lecture started", "lecture completed", "practice problem solved"],
|
|
133
|
+
conversionRate: 70,
|
|
134
|
+
timeToConvert: 4,
|
|
135
|
+
weight: 5,
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
// Assessment flow: quizzes and assignments after studying
|
|
139
|
+
sequence: ["quiz started", "quiz completed", "assignment submitted"],
|
|
140
|
+
conversionRate: 55,
|
|
141
|
+
timeToConvert: 8,
|
|
142
|
+
weight: 3,
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
// Course completion journey: enroll → complete → earn certificate
|
|
146
|
+
sequence: ["course enrolled", "lecture completed", "quiz completed", "certificate earned"],
|
|
147
|
+
conversionRate: 30,
|
|
148
|
+
timeToConvert: 48,
|
|
149
|
+
weight: 2,
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
// Social learning: discussions and study groups
|
|
153
|
+
sequence: ["discussion posted", "study group joined", "resource downloaded"],
|
|
154
|
+
conversionRate: 50,
|
|
155
|
+
timeToConvert: 12,
|
|
156
|
+
weight: 2,
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
// Instructor interaction loop
|
|
160
|
+
sequence: ["assignment submitted", "assignment graded", "instructor feedback given"],
|
|
161
|
+
conversionRate: 45,
|
|
162
|
+
timeToConvert: 24,
|
|
163
|
+
weight: 2,
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
// Support and monetization
|
|
167
|
+
sequence: ["help requested", "subscription purchased", "course reviewed"],
|
|
168
|
+
conversionRate: 35,
|
|
169
|
+
timeToConvert: 24,
|
|
170
|
+
weight: 1,
|
|
171
|
+
},
|
|
172
|
+
],
|
|
173
|
+
|
|
174
|
+
events: [
|
|
175
|
+
{
|
|
176
|
+
event: "account registered",
|
|
177
|
+
weight: 1,
|
|
178
|
+
isFirstEvent: true,
|
|
179
|
+
properties: {
|
|
180
|
+
"account_type": u.pickAWinner(["student", "instructor"], 0.15),
|
|
181
|
+
"signup_source": u.pickAWinner(["organic", "referral", "school_partnership", "social_ad"]),
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
event: "course enrolled",
|
|
186
|
+
weight: 8,
|
|
187
|
+
properties: {
|
|
188
|
+
"course_id": u.pickAWinner(courseIds),
|
|
189
|
+
"course_category": u.pickAWinner(["CS", "Math", "Science", "Business", "Arts", "Languages"]),
|
|
190
|
+
"difficulty": u.pickAWinner(["beginner", "intermediate", "advanced"]),
|
|
191
|
+
"is_free": u.pickAWinner([true, false], 0.4),
|
|
192
|
+
}
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
event: "lecture started",
|
|
196
|
+
weight: 18,
|
|
197
|
+
properties: {
|
|
198
|
+
"course_id": u.pickAWinner(courseIds),
|
|
199
|
+
"lecture_id": u.pickAWinner(lectureIds),
|
|
200
|
+
"lecture_duration_mins": u.weighNumRange(5, 60, 0.8, 20),
|
|
201
|
+
"module_number": u.weighNumRange(1, 12),
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
event: "lecture completed",
|
|
206
|
+
weight: 14,
|
|
207
|
+
properties: {
|
|
208
|
+
"course_id": u.pickAWinner(courseIds),
|
|
209
|
+
"lecture_id": u.pickAWinner(lectureIds),
|
|
210
|
+
"watch_time_mins": u.weighNumRange(3, 60, 0.8, 20),
|
|
211
|
+
"playback_speed": u.pickAWinner([0.75, 1.0, 1.0, 1.0, 1.25, 1.5, 2.0]),
|
|
212
|
+
"notes_taken": u.pickAWinner([true, false], 0.35),
|
|
213
|
+
}
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
event: "quiz started",
|
|
217
|
+
weight: 10,
|
|
218
|
+
properties: {
|
|
219
|
+
"course_id": u.pickAWinner(courseIds),
|
|
220
|
+
"quiz_id": u.pickAWinner(quizIds),
|
|
221
|
+
"quiz_type": u.pickAWinner(["practice", "graded", "final_exam"]),
|
|
222
|
+
"question_count": u.weighNumRange(5, 50, 0.7, 15),
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
event: "quiz completed",
|
|
227
|
+
weight: 8,
|
|
228
|
+
properties: {
|
|
229
|
+
"course_id": u.pickAWinner(courseIds),
|
|
230
|
+
"quiz_id": u.pickAWinner(quizIds),
|
|
231
|
+
"score_percent": u.weighNumRange(0, 100, 1.2, 50),
|
|
232
|
+
"time_spent_mins": u.weighNumRange(3, 120, 0.6, 25),
|
|
233
|
+
"attempts": u.weighNumRange(1, 5, 0.5, 3),
|
|
234
|
+
}
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
event: "assignment submitted",
|
|
238
|
+
weight: 6,
|
|
239
|
+
properties: {
|
|
240
|
+
"course_id": u.pickAWinner(courseIds),
|
|
241
|
+
"assignment_id": u.pickAWinner(assignmentIds),
|
|
242
|
+
"submission_type": u.pickAWinner(["text", "code", "file", "project"]),
|
|
243
|
+
"word_count": u.weighNumRange(100, 5000, 0.6, 500),
|
|
244
|
+
"is_late": u.pickAWinner([true, false], 0.2),
|
|
245
|
+
}
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
event: "assignment graded",
|
|
249
|
+
weight: 5,
|
|
250
|
+
properties: {
|
|
251
|
+
"course_id": u.pickAWinner(courseIds),
|
|
252
|
+
"assignment_id": u.pickAWinner(assignmentIds),
|
|
253
|
+
"grade": u.pickAWinner(["A", "B", "C", "D", "F"]),
|
|
254
|
+
"feedback_length": u.weighNumRange(0, 500, 0.5, 100),
|
|
255
|
+
"grader": u.pickAWinner(["instructor", "peer", "auto"]),
|
|
256
|
+
}
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
event: "discussion posted",
|
|
260
|
+
weight: 7,
|
|
261
|
+
properties: {
|
|
262
|
+
"course_id": u.pickAWinner(courseIds),
|
|
263
|
+
"post_type": u.pickAWinner(["question", "answer", "comment"]),
|
|
264
|
+
"word_count": u.weighNumRange(10, 500, 0.6, 80),
|
|
265
|
+
}
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
event: "certificate earned",
|
|
269
|
+
weight: 2,
|
|
270
|
+
properties: {
|
|
271
|
+
"course_id": u.pickAWinner(courseIds),
|
|
272
|
+
"completion_time_days": u.weighNumRange(7, 180, 0.5, 45),
|
|
273
|
+
"final_grade": u.weighNumRange(60, 100, 1.2, 30),
|
|
274
|
+
}
|
|
275
|
+
},
|
|
276
|
+
{
|
|
277
|
+
event: "study group joined",
|
|
278
|
+
weight: 4,
|
|
279
|
+
properties: {
|
|
280
|
+
"group_id": u.pickAWinner(groupIds),
|
|
281
|
+
"group_size": u.weighNumRange(3, 20, 0.7, 8),
|
|
282
|
+
"group_type": u.pickAWinner(["study_circle", "project_team", "tutoring"]),
|
|
283
|
+
}
|
|
284
|
+
},
|
|
285
|
+
{
|
|
286
|
+
event: "resource downloaded",
|
|
287
|
+
weight: 9,
|
|
288
|
+
properties: {
|
|
289
|
+
"resource_type": u.pickAWinner(["pdf", "slides", "code_sample", "dataset", "cheat_sheet"]),
|
|
290
|
+
"course_id": u.pickAWinner(courseIds),
|
|
291
|
+
}
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
event: "instructor feedback given",
|
|
295
|
+
weight: 3,
|
|
296
|
+
properties: {
|
|
297
|
+
"course_id": u.pickAWinner(courseIds),
|
|
298
|
+
"feedback_type": u.pickAWinner(["written", "video", "rubric"]),
|
|
299
|
+
"response_time_hours": u.weighNumRange(1, 72, 0.5, 15),
|
|
300
|
+
}
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
event: "course reviewed",
|
|
304
|
+
weight: 3,
|
|
305
|
+
properties: {
|
|
306
|
+
"course_id": u.pickAWinner(courseIds),
|
|
307
|
+
"rating": u.weighNumRange(1, 5, 1.5, 3),
|
|
308
|
+
"review_length": u.weighNumRange(10, 1000, 0.5, 100),
|
|
309
|
+
"would_recommend": u.pickAWinner([true, false], 0.7),
|
|
310
|
+
}
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
event: "subscription purchased",
|
|
314
|
+
weight: 2,
|
|
315
|
+
properties: {
|
|
316
|
+
"plan": u.pickAWinner(["monthly", "annual", "lifetime"]),
|
|
317
|
+
"price": u.pickAWinner([19.99, 149.99, 499.99]),
|
|
318
|
+
}
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
event: "help requested",
|
|
322
|
+
weight: 4,
|
|
323
|
+
properties: {
|
|
324
|
+
"topic": u.pickAWinner(["technical", "content", "billing", "accessibility"]),
|
|
325
|
+
"channel": u.pickAWinner(["chat", "email", "forum"]),
|
|
326
|
+
}
|
|
327
|
+
},
|
|
328
|
+
{
|
|
329
|
+
event: "practice problem solved",
|
|
330
|
+
weight: 12,
|
|
331
|
+
properties: {
|
|
332
|
+
"course_id": u.pickAWinner(courseIds),
|
|
333
|
+
"problem_id": u.pickAWinner(problemIds),
|
|
334
|
+
"difficulty": u.pickAWinner(["easy", "medium", "hard"]),
|
|
335
|
+
"time_to_solve_sec": u.weighNumRange(10, 3600, 0.5, 300),
|
|
336
|
+
"hint_used": u.pickAWinner([true, false], 0.35),
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
],
|
|
340
|
+
|
|
341
|
+
superProps: {
|
|
342
|
+
platform: u.pickAWinner(["Web", "iOS", "Android", "iPad"]),
|
|
343
|
+
subscription_status: u.pickAWinner(["free", "free", "free", "monthly", "annual"]),
|
|
344
|
+
},
|
|
345
|
+
|
|
346
|
+
scdProps: {},
|
|
347
|
+
|
|
348
|
+
userProps: {
|
|
349
|
+
"account_type": u.pickAWinner(["student", "student", "student", "student", "student", "student", "student", "student", "instructor"]),
|
|
350
|
+
"learning_style": u.pickAWinner(["visual", "reading", "hands_on", "auditory"]),
|
|
351
|
+
"education_level": u.pickAWinner(["high_school", "bachelors", "masters", "phd", "self_taught"]),
|
|
352
|
+
"timezone": u.pickAWinner(["US_Eastern", "US_Pacific", "US_Central", "Europe", "Asia"]),
|
|
353
|
+
},
|
|
354
|
+
|
|
355
|
+
groupKeys: [
|
|
356
|
+
["course_id", 150, ["course enrolled", "lecture started", "lecture completed", "quiz completed", "certificate earned"]],
|
|
357
|
+
["group_id", 300, ["study group joined", "discussion posted"]],
|
|
358
|
+
],
|
|
359
|
+
|
|
360
|
+
groupProps: {
|
|
361
|
+
course_id: {
|
|
362
|
+
"title": () => `${chance.pickone(["Introduction to", "Advanced", "Mastering", "Fundamentals of", "Applied"])} ${chance.pickone(["Algorithms", "Data Science", "Machine Learning", "Statistics", "Web Development", "Calculus", "Biology", "Economics", "Design Thinking", "Creative Writing"])}`,
|
|
363
|
+
"instructor_count": u.weighNumRange(1, 5, 0.5, 2),
|
|
364
|
+
"total_enrolled": u.weighNumRange(50, 5000, 0.6, 500),
|
|
365
|
+
"avg_rating": u.weighNumRange(3, 5, 1.5, 1),
|
|
366
|
+
},
|
|
367
|
+
group_id: {
|
|
368
|
+
"name": () => `${chance.pickone(["Study", "Learning", "Focus", "Peer", "Cohort"])} ${chance.pickone(["Circle", "Squad", "Team", "Hub", "Group"])} ${chance.character({ alpha: true, casing: "upper" })}${chance.integer({ min: 1, max: 99 })}`,
|
|
369
|
+
"member_count": u.weighNumRange(3, 20, 0.7, 8),
|
|
370
|
+
"focus_area": u.pickAWinner(["CS", "Math", "Science", "Business", "Arts", "Languages"]),
|
|
371
|
+
}
|
|
372
|
+
},
|
|
373
|
+
|
|
374
|
+
lookupTables: [],
|
|
375
|
+
|
|
376
|
+
/**
|
|
377
|
+
* ARCHITECTED ANALYTICS HOOKS
|
|
378
|
+
*
|
|
379
|
+
* This hook function creates 8 deliberate patterns in the data:
|
|
380
|
+
*
|
|
381
|
+
* 1. STUDENT VS INSTRUCTOR PROFILES: Instructor profiles get teaching attributes; students get learning attributes
|
|
382
|
+
* 2. DEADLINE CRAMMING: Assignments submitted on Sun/Mon are rushed and lower quality
|
|
383
|
+
* 3. NOTES-TAKERS SUCCEED: Students who take notes during lectures score higher on quizzes
|
|
384
|
+
* 4. STUDY GROUP RETENTION: Early study group joiners retain; non-joiners with low scores churn
|
|
385
|
+
* 5. HINT DEPENDENCY: Hint users get locked into easy problems; non-hint users tackle harder ones
|
|
386
|
+
* 6. SEMESTER-END SPIKE: Days 75-85 see doubled assessment activity (cramming period)
|
|
387
|
+
* 7. FREE VS PAID COURSES: Paid subscribers convert through Course Completion funnel at ~2.2x rate
|
|
388
|
+
* 8. PLAYBACK SPEED CORRELATION: Speed learners paradoxically score higher; thorough learners get extended time
|
|
389
|
+
*/
|
|
390
|
+
hook: function (record, type, meta) {
|
|
391
|
+
const NOW = dayjs();
|
|
392
|
+
const DATASET_START = NOW.subtract(days, 'days');
|
|
393
|
+
|
|
394
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
395
|
+
// Hook #1: STUDENT VS INSTRUCTOR PROFILES
|
|
396
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
397
|
+
if (type === "user") {
|
|
398
|
+
if (record.account_type === "instructor") {
|
|
399
|
+
record.courses_created = chance.integer({ min: 1, max: 15 });
|
|
400
|
+
record.teaching_experience_years = chance.integer({ min: 1, max: 20 });
|
|
401
|
+
record.instructor_rating = Math.round((chance.floating({ min: 3.0, max: 5.0 }) + Number.EPSILON) * 100) / 100;
|
|
402
|
+
} else {
|
|
403
|
+
record.learning_goal = chance.pickone(["career_change", "skill_upgrade", "hobby", "degree_requirement"]);
|
|
404
|
+
record.study_hours_per_week = chance.integer({ min: 2, max: 30 });
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
409
|
+
// Hook #2: DEADLINE CRAMMING
|
|
410
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
411
|
+
if (type === "event") {
|
|
412
|
+
if (record.event === "assignment submitted" && record.time) {
|
|
413
|
+
const eventDay = dayjs(record.time).day(); // 0 = Sunday, 1 = Monday
|
|
414
|
+
if (eventDay === 0 || eventDay === 1) {
|
|
415
|
+
record.is_deadline_rush = true;
|
|
416
|
+
// 60% chance of being late (only 40% on time)
|
|
417
|
+
record.is_late = !chance.bool({ likelihood: 40 });
|
|
418
|
+
} else {
|
|
419
|
+
record.is_deadline_rush = false;
|
|
420
|
+
record.is_late = !chance.bool({ likelihood: 80 });
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
if (record.event === "quiz completed" && record.time) {
|
|
425
|
+
const eventDay = dayjs(record.time).day();
|
|
426
|
+
if (eventDay === 0 || eventDay === 1) {
|
|
427
|
+
if (record.score_percent !== undefined) {
|
|
428
|
+
record.score_percent = Math.max(0, record.score_percent - 15);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
435
|
+
// Hook #5: HINT DEPENDENCY
|
|
436
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
437
|
+
if (type === "event") {
|
|
438
|
+
if (record.event === "practice problem solved") {
|
|
439
|
+
if (record.hint_used === true) {
|
|
440
|
+
// Hint users gravitate toward easy problems
|
|
441
|
+
if (chance.bool({ likelihood: 60 })) {
|
|
442
|
+
record.difficulty = "easy";
|
|
443
|
+
}
|
|
444
|
+
record.independent_solver = false;
|
|
445
|
+
} else if (record.hint_used === false) {
|
|
446
|
+
// Independent solvers tackle harder problems
|
|
447
|
+
if (chance.bool({ likelihood: 40 })) {
|
|
448
|
+
record.difficulty = "hard";
|
|
449
|
+
record.independent_solver = true;
|
|
450
|
+
} else {
|
|
451
|
+
record.independent_solver = false;
|
|
452
|
+
}
|
|
453
|
+
} else {
|
|
454
|
+
record.independent_solver = false;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
460
|
+
// Hook #6: SEMESTER-END SPIKE
|
|
461
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
462
|
+
if (type === "event") {
|
|
463
|
+
if (record.time) {
|
|
464
|
+
const eventTime = dayjs(record.time);
|
|
465
|
+
const dayInDataset = eventTime.diff(DATASET_START, 'days', true);
|
|
466
|
+
|
|
467
|
+
const spikableEvents = ["quiz started", "quiz completed", "assignment submitted"];
|
|
468
|
+
if (spikableEvents.includes(record.event)) {
|
|
469
|
+
if (dayInDataset >= 75 && dayInDataset <= 85) {
|
|
470
|
+
record.semester_end_rush = true;
|
|
471
|
+
|
|
472
|
+
// 50% chance to duplicate
|
|
473
|
+
if (chance.bool({ likelihood: 50 })) {
|
|
474
|
+
const duplicate = JSON.parse(JSON.stringify(record));
|
|
475
|
+
duplicate.time = eventTime.add(chance.integer({ min: 5, max: 120 }), 'minutes').toISOString();
|
|
476
|
+
duplicate.semester_end_rush = true;
|
|
477
|
+
return [record, duplicate];
|
|
478
|
+
}
|
|
479
|
+
} else {
|
|
480
|
+
record.semester_end_rush = false;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
487
|
+
// Hook #8: PLAYBACK SPEED CORRELATION
|
|
488
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
489
|
+
if (type === "event") {
|
|
490
|
+
if (record.event === "lecture completed") {
|
|
491
|
+
const speed = record.playback_speed;
|
|
492
|
+
|
|
493
|
+
if (speed >= 1.5) {
|
|
494
|
+
record.speed_learner = true;
|
|
495
|
+
record.thorough_learner = false;
|
|
496
|
+
// Compress watch time for speed learners
|
|
497
|
+
if (record.watch_time_mins !== undefined) {
|
|
498
|
+
record.watch_time_mins = Math.max(3, Math.floor(record.watch_time_mins * 0.6));
|
|
499
|
+
}
|
|
500
|
+
} else if (speed !== undefined && speed <= 1.0) {
|
|
501
|
+
record.speed_learner = false;
|
|
502
|
+
record.thorough_learner = true;
|
|
503
|
+
// Extend watch time for thorough learners
|
|
504
|
+
if (record.watch_time_mins !== undefined) {
|
|
505
|
+
record.watch_time_mins = Math.min(90, Math.floor(record.watch_time_mins * 1.4));
|
|
506
|
+
}
|
|
507
|
+
} else {
|
|
508
|
+
record.speed_learner = false;
|
|
509
|
+
record.thorough_learner = false;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
515
|
+
// Hook #3: NOTES-TAKERS SUCCEED
|
|
516
|
+
// Hook #4: STUDY GROUP RETENTION
|
|
517
|
+
// Hook #7: FREE VS PAID (funnel-pre handled below)
|
|
518
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
519
|
+
if (type === "everything") {
|
|
520
|
+
const userEvents = record;
|
|
521
|
+
const firstEventTime = userEvents.length > 0 ? dayjs(userEvents[0].time) : null;
|
|
522
|
+
|
|
523
|
+
// ---------------------------------------------------------------
|
|
524
|
+
// First pass: identify user patterns
|
|
525
|
+
// ---------------------------------------------------------------
|
|
526
|
+
let notesTakenCount = 0;
|
|
527
|
+
let joinedStudyGroupEarly = false;
|
|
528
|
+
let hasLowQuizScore = false;
|
|
529
|
+
|
|
530
|
+
userEvents.forEach((event) => {
|
|
531
|
+
const eventTime = dayjs(event.time);
|
|
532
|
+
const daysSinceStart = firstEventTime ? eventTime.diff(firstEventTime, 'days', true) : 0;
|
|
533
|
+
|
|
534
|
+
// Hook #3: Count lecture_completed events where notes_taken === true
|
|
535
|
+
if (event.event === "lecture completed" && event.notes_taken === true) {
|
|
536
|
+
notesTakenCount++;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Hook #4: Check if user joined a study group within the first 10 days
|
|
540
|
+
if (event.event === "study group joined" && daysSinceStart <= 10) {
|
|
541
|
+
joinedStudyGroupEarly = true;
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// Hook #4: Check for any quiz_completed with score < 60
|
|
545
|
+
if (event.event === "quiz completed" && event.score_percent < 60) {
|
|
546
|
+
hasLowQuizScore = true;
|
|
547
|
+
}
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
// ---------------------------------------------------------------
|
|
551
|
+
// Second pass: modify events based on patterns
|
|
552
|
+
// ---------------------------------------------------------------
|
|
553
|
+
|
|
554
|
+
// Hook #3: NOTES-TAKERS SUCCEED
|
|
555
|
+
if (notesTakenCount >= 5) {
|
|
556
|
+
userEvents.forEach((event, idx) => {
|
|
557
|
+
// Boost quiz scores for diligent note-takers
|
|
558
|
+
if (event.event === "quiz completed") {
|
|
559
|
+
if (event.score_percent !== undefined) {
|
|
560
|
+
event.score_percent = Math.min(100, event.score_percent + 20);
|
|
561
|
+
}
|
|
562
|
+
event.diligent_student = true;
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
// 40% chance to splice in an extra certificate_earned event
|
|
567
|
+
if (chance.bool({ likelihood: 40 })) {
|
|
568
|
+
const lastEvent = userEvents[userEvents.length - 1];
|
|
569
|
+
if (lastEvent) {
|
|
570
|
+
const certEvent = {
|
|
571
|
+
event: "certificate earned",
|
|
572
|
+
time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 5 }), 'days').toISOString(),
|
|
573
|
+
user_id: lastEvent.user_id,
|
|
574
|
+
course_id: chance.pickone(courseIds),
|
|
575
|
+
completion_time_days: chance.integer({ min: 14, max: 90 }),
|
|
576
|
+
final_grade: chance.integer({ min: 80, max: 100 }),
|
|
577
|
+
diligent_student: true,
|
|
578
|
+
};
|
|
579
|
+
userEvents.push(certEvent);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Hook #4: STUDY GROUP RETENTION
|
|
585
|
+
if (!joinedStudyGroupEarly && hasLowQuizScore) {
|
|
586
|
+
// Non-joiners with low scores: remove 70% of events after day 14 (churn)
|
|
587
|
+
const churnCutoff = firstEventTime ? firstEventTime.add(14, 'days') : null;
|
|
588
|
+
for (let i = userEvents.length - 1; i >= 0; i--) {
|
|
589
|
+
const evt = userEvents[i];
|
|
590
|
+
if (churnCutoff && dayjs(evt.time).isAfter(churnCutoff)) {
|
|
591
|
+
if (chance.bool({ likelihood: 70 })) {
|
|
592
|
+
userEvents.splice(i, 1);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
} else if (joinedStudyGroupEarly) {
|
|
597
|
+
// Study group joiners keep all events and get bonus discussion_posted events
|
|
598
|
+
const lastEvent = userEvents[userEvents.length - 1];
|
|
599
|
+
if (lastEvent && chance.bool({ likelihood: 60 })) {
|
|
600
|
+
const bonusDiscussion = {
|
|
601
|
+
event: "discussion posted",
|
|
602
|
+
time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 3 }), 'days').toISOString(),
|
|
603
|
+
user_id: lastEvent.user_id,
|
|
604
|
+
course_id: chance.pickone(courseIds),
|
|
605
|
+
post_type: chance.pickone(["question", "answer", "comment"]),
|
|
606
|
+
word_count: chance.integer({ min: 20, max: 400 }),
|
|
607
|
+
study_group_member: true,
|
|
608
|
+
};
|
|
609
|
+
userEvents.push(bonusDiscussion);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
// Hook #8 (everything pass): Speed learners get slightly HIGHER quiz scores
|
|
614
|
+
let isSpeedLearner = false;
|
|
615
|
+
userEvents.forEach((event) => {
|
|
616
|
+
if (event.event === "lecture completed" && event.speed_learner === true) {
|
|
617
|
+
isSpeedLearner = true;
|
|
618
|
+
}
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
if (isSpeedLearner) {
|
|
622
|
+
userEvents.forEach((event) => {
|
|
623
|
+
if (event.event === "quiz completed") {
|
|
624
|
+
if (event.score_percent !== undefined) {
|
|
625
|
+
event.score_percent = Math.min(100, event.score_percent + 8);
|
|
626
|
+
event.speed_learner_effect = true;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
634
|
+
// Hook #7: FREE VS PAID COURSES (funnel-pre)
|
|
635
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
636
|
+
if (type === "funnel-pre") {
|
|
637
|
+
// Target funnels containing course completion events
|
|
638
|
+
if (meta && meta.profile && meta.funnel) {
|
|
639
|
+
const subscriptionStatus = meta.profile.subscription_status;
|
|
640
|
+
|
|
641
|
+
if (subscriptionStatus === "free") {
|
|
642
|
+
// Free users convert at 0.6x rate
|
|
643
|
+
record.conversionRate = (record.conversionRate || 0.25) * 0.6;
|
|
644
|
+
} else if (subscriptionStatus === "monthly" || subscriptionStatus === "annual") {
|
|
645
|
+
// Paid subscribers convert at 1.3x rate
|
|
646
|
+
record.conversionRate = (record.conversionRate || 0.25) * 1.3;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
return record;
|
|
652
|
+
}
|
|
653
|
+
};
|
|
654
|
+
|
|
655
|
+
export default config;
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
659
|
+
* NEEDLE IN A HAYSTACK - LEARNPATH ELEARNING ANALYTICS
|
|
660
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
661
|
+
*
|
|
662
|
+
* An online learning platform dungeon with 8 deliberately architected analytics
|
|
663
|
+
* insights hidden in the data. This dungeon simulates a real EdTech product
|
|
664
|
+
* (like Coursera, Khan Academy, or Udemy) and demonstrates how meaningful
|
|
665
|
+
* student behavior patterns can be discovered through product analytics.
|
|
666
|
+
*
|
|
667
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
668
|
+
* DATASET OVERVIEW
|
|
669
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
670
|
+
*
|
|
671
|
+
* - 5,000 users over 100 days
|
|
672
|
+
* - 360K events across 17 event types
|
|
673
|
+
* - 3 funnels (student onboarding, course completion, practice mastery)
|
|
674
|
+
* - Group analytics (courses, study groups)
|
|
675
|
+
* - Lookup tables (courses, quizzes)
|
|
676
|
+
* - Subscription tiers (free, monthly, annual)
|
|
677
|
+
*
|
|
678
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
679
|
+
* THE 8 ARCHITECTED HOOKS
|
|
680
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
681
|
+
*
|
|
682
|
+
* Each hook creates a specific, discoverable analytics insight that simulates
|
|
683
|
+
* real-world EdTech behavior patterns.
|
|
684
|
+
*
|
|
685
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
686
|
+
* 1. STUDENT VS INSTRUCTOR PROFILES
|
|
687
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
688
|
+
*
|
|
689
|
+
* PATTERN: User profiles are enriched based on account_type. Instructors receive
|
|
690
|
+
* teaching attributes (courses_created, teaching_experience_years, instructor_rating).
|
|
691
|
+
* Students receive learning attributes (learning_goal, study_hours_per_week).
|
|
692
|
+
*
|
|
693
|
+
* HOW TO FIND IT:
|
|
694
|
+
* - Segment users by: account_type = "instructor" vs "student"
|
|
695
|
+
* - Compare: presence of courses_created vs learning_goal properties
|
|
696
|
+
* - Filter profiles: instructor_rating exists (instructor-only property)
|
|
697
|
+
*
|
|
698
|
+
* EXPECTED INSIGHT: ~11% of users are instructors with teaching-specific metrics.
|
|
699
|
+
* Instructors should show different event patterns (more feedback given, fewer
|
|
700
|
+
* quizzes completed). Students show learning-goal-driven behavior differences.
|
|
701
|
+
*
|
|
702
|
+
* REAL-WORLD ANALOGUE: Two-sided marketplace profiling. Drivers vs riders in
|
|
703
|
+
* Uber, sellers vs buyers in eBay - each persona has unique attributes and
|
|
704
|
+
* behavioral patterns that require separate analysis.
|
|
705
|
+
*
|
|
706
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
707
|
+
* 2. DEADLINE CRAMMING
|
|
708
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
709
|
+
*
|
|
710
|
+
* PATTERN: Assignments submitted on Sundays and Mondays show deadline-rush
|
|
711
|
+
* behavior: 60% are late (vs ~20% baseline) and quiz scores drop by 15 points.
|
|
712
|
+
* These events carry is_deadline_rush: true.
|
|
713
|
+
*
|
|
714
|
+
* HOW TO FIND IT:
|
|
715
|
+
* - Chart: assignment_submitted by day of week
|
|
716
|
+
* - Compare: is_late rate by day of week
|
|
717
|
+
* - Compare: quiz_completed score_percent by day of week
|
|
718
|
+
* - Filter: is_deadline_rush = true
|
|
719
|
+
*
|
|
720
|
+
* EXPECTED INSIGHT: Clear quality drop on Sun/Mon. Late submission rate spikes
|
|
721
|
+
* from ~20% to ~60%. Quiz scores taken on crunch days average 15 points lower.
|
|
722
|
+
* This creates a visible "weekend dip" in student performance metrics.
|
|
723
|
+
*
|
|
724
|
+
* REAL-WORLD ANALOGUE: The "Sunday Scaries" of EdTech - students procrastinate
|
|
725
|
+
* and cram before Monday deadlines. Identical to real patterns seen in Coursera
|
|
726
|
+
* and university LMS data where submission quality drops near deadlines.
|
|
727
|
+
*
|
|
728
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
729
|
+
* 3. NOTES-TAKERS SUCCEED
|
|
730
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
731
|
+
*
|
|
732
|
+
* PATTERN: Students who take notes during 5 or more lecture_completed events
|
|
733
|
+
* receive a +20 boost to all quiz scores (capped at 100), and have a 40% chance
|
|
734
|
+
* of earning an extra certificate. Events are marked diligent_student: true.
|
|
735
|
+
*
|
|
736
|
+
* HOW TO FIND IT:
|
|
737
|
+
* - Create segment: users with 5+ lecture_completed where notes_taken = true
|
|
738
|
+
* - Compare: average quiz_completed score_percent
|
|
739
|
+
* - Compare: certificate_earned count per user
|
|
740
|
+
* - Filter: diligent_student = true
|
|
741
|
+
*
|
|
742
|
+
* EXPECTED INSIGHT: Diligent note-takers score ~20 points higher on quizzes
|
|
743
|
+
* and earn certificates at a significantly higher rate. This is a classic
|
|
744
|
+
* "active learning" signal visible in the data.
|
|
745
|
+
*
|
|
746
|
+
* REAL-WORLD ANALOGUE: Active engagement features (highlighting, bookmarking,
|
|
747
|
+
* note-taking) that correlate with better learning outcomes. Real research
|
|
748
|
+
* confirms note-taking improves retention by 30-40% - this hook models that.
|
|
749
|
+
*
|
|
750
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
751
|
+
* 4. STUDY GROUP RETENTION
|
|
752
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
753
|
+
*
|
|
754
|
+
* PATTERN: Students who join a study group within their first 10 days and have
|
|
755
|
+
* passing quiz scores retain normally and receive bonus discussion events. Students
|
|
756
|
+
* who do NOT join early AND have quiz scores below 60 experience severe churn:
|
|
757
|
+
* 70% of their events after day 14 are removed.
|
|
758
|
+
*
|
|
759
|
+
* HOW TO FIND IT:
|
|
760
|
+
* - Create cohort: users who did "study group joined" within first 10 days
|
|
761
|
+
* - Compare: D14/D30 retention rate vs non-joiners
|
|
762
|
+
* - Compare: total events per user after day 14
|
|
763
|
+
* - Filter: study_group_member = true on bonus events
|
|
764
|
+
*
|
|
765
|
+
* EXPECTED INSIGHT: Early study group joiners show dramatically better retention
|
|
766
|
+
* curves. Non-joiners with low quiz scores show a cliff-like drop in activity
|
|
767
|
+
* after day 14. The combination of social isolation + poor performance predicts
|
|
768
|
+
* churn with high accuracy.
|
|
769
|
+
*
|
|
770
|
+
* REAL-WORLD ANALOGUE: Social learning features that create accountability and
|
|
771
|
+
* community. MOOCs with study groups or cohort-based programs consistently show
|
|
772
|
+
* 3-5x higher completion rates than pure self-paced learning.
|
|
773
|
+
*
|
|
774
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
775
|
+
* 5. HINT DEPENDENCY
|
|
776
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
777
|
+
*
|
|
778
|
+
* PATTERN: In practice_problem_solved events, students who use hints have a 60%
|
|
779
|
+
* chance of having their problem difficulty set to "easy". Students who solve
|
|
780
|
+
* without hints have a 40% chance of tackling "hard" problems and receive
|
|
781
|
+
* independent_solver: true.
|
|
782
|
+
*
|
|
783
|
+
* HOW TO FIND IT:
|
|
784
|
+
* - Segment practice_problem_solved by: hint_used = true vs false
|
|
785
|
+
* - Compare: difficulty distribution (easy vs medium vs hard)
|
|
786
|
+
* - Filter: independent_solver = true
|
|
787
|
+
* - Compare: average time_to_solve_sec by hint usage
|
|
788
|
+
*
|
|
789
|
+
* EXPECTED INSIGHT: Hint users cluster on easy problems; non-hint users tackle
|
|
790
|
+
* harder problems. This creates a visible "hint dependency" where the scaffolding
|
|
791
|
+
* intended to help students actually limits their growth trajectory.
|
|
792
|
+
*
|
|
793
|
+
* REAL-WORLD ANALOGUE: The "training wheels" problem in education technology.
|
|
794
|
+
* Hints, auto-complete, and guided solutions can create dependency rather than
|
|
795
|
+
* building genuine competence. Real platforms like LeetCode and HackerRank
|
|
796
|
+
* observe this pattern.
|
|
797
|
+
*
|
|
798
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
799
|
+
* 6. SEMESTER-END SPIKE
|
|
800
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
801
|
+
*
|
|
802
|
+
* PATTERN: During days 75-85 of the dataset, quiz_started, quiz_completed, and
|
|
803
|
+
* assignment_submitted events have a 50% chance of being duplicated (with slightly
|
|
804
|
+
* offset timestamps). All events in this window carry semester_end_rush: true.
|
|
805
|
+
*
|
|
806
|
+
* HOW TO FIND IT:
|
|
807
|
+
* - Chart: quiz_started, quiz_completed, assignment_submitted counts by day
|
|
808
|
+
* - Look for: clear volume spike during days 75-85
|
|
809
|
+
* - Filter: semester_end_rush = true
|
|
810
|
+
* - Compare: event volume in days 75-85 vs days 60-75 (baseline)
|
|
811
|
+
*
|
|
812
|
+
* EXPECTED INSIGHT: Assessment activity roughly doubles during the "finals"
|
|
813
|
+
* period. This creates a visible spike in the time series that mirrors real
|
|
814
|
+
* academic calendar patterns.
|
|
815
|
+
*
|
|
816
|
+
* REAL-WORLD ANALOGUE: End-of-semester, end-of-quarter, or end-of-trial
|
|
817
|
+
* behavior spikes. Every EdTech platform sees massive activity surges before
|
|
818
|
+
* deadlines, certification exams, or subscription renewal dates.
|
|
819
|
+
*
|
|
820
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
821
|
+
* 7. FREE VS PAID COURSES
|
|
822
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
823
|
+
*
|
|
824
|
+
* PATTERN: The Course Completion funnel conversion rate is modified by the user's
|
|
825
|
+
* subscription_status. Free users convert at 0.6x the base rate; monthly and
|
|
826
|
+
* annual subscribers convert at 1.3x. This creates a ~2.2x difference between
|
|
827
|
+
* free and paid users in course completion.
|
|
828
|
+
*
|
|
829
|
+
* HOW TO FIND IT:
|
|
830
|
+
* - Segment the Course Completion funnel by: subscription_status
|
|
831
|
+
* - Compare: funnel conversion rates for free vs monthly vs annual
|
|
832
|
+
* - Compare: certificate_earned counts by subscription_status
|
|
833
|
+
*
|
|
834
|
+
* EXPECTED INSIGHT: Paid subscribers are roughly 2x more likely to complete
|
|
835
|
+
* courses end-to-end. Free users drop off heavily between quiz_completed and
|
|
836
|
+
* certificate_earned. This mirrors the "skin in the game" effect.
|
|
837
|
+
*
|
|
838
|
+
* REAL-WORLD ANALOGUE: The well-documented correlation between payment and
|
|
839
|
+
* completion in online education. Paid Coursera learners complete courses at
|
|
840
|
+
* 5-10x the rate of free audit-track learners. Financial commitment creates
|
|
841
|
+
* psychological commitment.
|
|
842
|
+
*
|
|
843
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
844
|
+
* 8. PLAYBACK SPEED CORRELATION
|
|
845
|
+
* ───────────────────────────────────────────────────────────────────────────────
|
|
846
|
+
*
|
|
847
|
+
* PATTERN: In lecture_completed events, playback speed creates two distinct
|
|
848
|
+
* learner segments:
|
|
849
|
+
* - Speed learners (>= 1.5x): get speed_learner: true, compressed watch_time
|
|
850
|
+
* (0.6x), and paradoxically HIGHER quiz scores (+8 points)
|
|
851
|
+
* - Thorough learners (<= 1.0x): get thorough_learner: true, extended watch_time
|
|
852
|
+
* (1.4x)
|
|
853
|
+
*
|
|
854
|
+
* HOW TO FIND IT:
|
|
855
|
+
* - Segment lecture_completed by: playback_speed
|
|
856
|
+
* - Compare: average watch_time_mins by speed bucket
|
|
857
|
+
* - Compare: subsequent quiz_completed score_percent
|
|
858
|
+
* - Filter: speed_learner = true or thorough_learner = true
|
|
859
|
+
* - Correlate: playback_speed with quiz performance
|
|
860
|
+
*
|
|
861
|
+
* EXPECTED INSIGHT: Counter-intuitively, speed learners score slightly higher
|
|
862
|
+
* on quizzes despite watching lectures faster. This suggests that playback speed
|
|
863
|
+
* is a proxy for prior knowledge or aptitude, not laziness.
|
|
864
|
+
*
|
|
865
|
+
* REAL-WORLD ANALOGUE: Research on lecture playback speed consistently shows
|
|
866
|
+
* that students who watch at 1.5-2x speed perform equally or better on assessments.
|
|
867
|
+
* Speed selection correlates with confidence and familiarity with the material,
|
|
868
|
+
* not with learning quality.
|
|
869
|
+
*
|
|
870
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
871
|
+
* ADVANCED ANALYSIS IDEAS
|
|
872
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
873
|
+
*
|
|
874
|
+
* CROSS-HOOK PATTERNS:
|
|
875
|
+
*
|
|
876
|
+
* 1. The Ideal Student: Users who:
|
|
877
|
+
* - Take notes consistently (Hook #3)
|
|
878
|
+
* - Join study groups early (Hook #4)
|
|
879
|
+
* - Solve problems without hints (Hook #5)
|
|
880
|
+
* - Have paid subscriptions (Hook #7)
|
|
881
|
+
* - Watch lectures at higher speed (Hook #8)
|
|
882
|
+
* These students should have exceptional completion rates and quiz scores.
|
|
883
|
+
*
|
|
884
|
+
* 2. The Cramming Cascade: Do deadline crammers (Hook #2) also show up
|
|
885
|
+
* in the semester-end spike (Hook #6)? Is the quality drop compounded?
|
|
886
|
+
*
|
|
887
|
+
* 3. Social Safety Net: Does early study group joining (Hook #4) prevent
|
|
888
|
+
* churn even for students who struggle on quizzes?
|
|
889
|
+
*
|
|
890
|
+
* 4. Hint-to-Mastery Pipeline: Do hint-dependent students (Hook #5) who
|
|
891
|
+
* later join study groups (Hook #4) eventually wean off hints?
|
|
892
|
+
*
|
|
893
|
+
* 5. Payment + Notes: Are paid subscribers (Hook #7) more likely to take
|
|
894
|
+
* notes (Hook #3)? Does the combination create a super-performer segment?
|
|
895
|
+
*
|
|
896
|
+
* COHORT ANALYSIS:
|
|
897
|
+
*
|
|
898
|
+
* - Cohort by education level: Do PhD students vs self-taught learners
|
|
899
|
+
* show different hook patterns?
|
|
900
|
+
* - Cohort by learning style: Do visual vs hands-on learners take more notes?
|
|
901
|
+
* - Cohort by platform: Do mobile (iOS/Android) users have different playback
|
|
902
|
+
* speed preferences than Web/iPad users?
|
|
903
|
+
* - Cohort by course category: Do CS students use hints more than Arts students?
|
|
904
|
+
*
|
|
905
|
+
* FUNNEL ANALYSIS:
|
|
906
|
+
*
|
|
907
|
+
* - Onboarding Funnel: How does account_type affect the register -> enroll ->
|
|
908
|
+
* first lecture conversion?
|
|
909
|
+
* - Course Completion Funnel: Compare by subscription_status, note-taking
|
|
910
|
+
* behavior, and study group membership
|
|
911
|
+
* - Practice Mastery Funnel: Compare by hint usage, playback speed, and
|
|
912
|
+
* learning style
|
|
913
|
+
*
|
|
914
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
915
|
+
* EXPECTED METRICS SUMMARY
|
|
916
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
917
|
+
*
|
|
918
|
+
* Hook | Metric | Baseline | Hook Effect | Ratio
|
|
919
|
+
* ────────────────────────|───────────────────────|──────────|──────────────|──────
|
|
920
|
+
* Student vs Instructor | Profile attributes | generic | role-specific| N/A
|
|
921
|
+
* Deadline Cramming | Late submission rate | ~20% | ~60% | 3x
|
|
922
|
+
* Deadline Cramming | Quiz score (Sun/Mon) | ~65 | ~50 | -15pt
|
|
923
|
+
* Notes-Takers Succeed | Quiz score | ~65 | ~85 | +20pt
|
|
924
|
+
* Notes-Takers Succeed | Certificate rate | baseline | +40% | 1.4x
|
|
925
|
+
* Study Group Retention | D14 retention | ~40% | ~90% | 2.3x
|
|
926
|
+
* Study Group Retention | Post-D14 events | 100% | 30% (churn) | 0.3x
|
|
927
|
+
* Hint Dependency | Easy problem rate | ~33% | ~60% | 1.8x
|
|
928
|
+
* Hint Dependency | Hard problem rate | ~33% | ~40% (no hint)| 1.2x
|
|
929
|
+
* Semester-End Spike | Assessment volume | baseline | ~2x | 2x
|
|
930
|
+
* Free vs Paid | Course completion | 15% | 33% | 2.2x
|
|
931
|
+
* Playback Speed | Quiz score (speed) | ~65 | ~73 | +8pt
|
|
932
|
+
*
|
|
933
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
934
|
+
* HOW TO RUN THIS DUNGEON
|
|
935
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
936
|
+
*
|
|
937
|
+
* From the dm4 root directory:
|
|
938
|
+
*
|
|
939
|
+
* npm start
|
|
940
|
+
*
|
|
941
|
+
* Or programmatically:
|
|
942
|
+
*
|
|
943
|
+
* import generate from './index.js';
|
|
944
|
+
* import config from './dungeons/harness-education.js';
|
|
945
|
+
* const results = await generate(config);
|
|
946
|
+
*
|
|
947
|
+
* OUTPUT FILES (with writeToDisk: true, format: "json", gzip: true):
|
|
948
|
+
*
|
|
949
|
+
* - needle-haystack-education__events.json.gz - All event data
|
|
950
|
+
* - needle-haystack-education__user_profiles.json.gz - User profiles
|
|
951
|
+
* - needle-haystack-education__group_profiles.json.gz - Course & study group profiles
|
|
952
|
+
* - needle-haystack-education__course_id_lookup.json.gz - Course catalog
|
|
953
|
+
* - needle-haystack-education__quiz_id_lookup.json.gz - Quiz catalog
|
|
954
|
+
*
|
|
955
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
956
|
+
* TESTING YOUR ANALYTICS PLATFORM
|
|
957
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
958
|
+
*
|
|
959
|
+
* This dungeon is perfect for testing:
|
|
960
|
+
*
|
|
961
|
+
* 1. Segmentation: Can you separate instructor vs student behavior patterns?
|
|
962
|
+
* 2. Temporal Analysis: Can you detect the deadline cramming and semester-end spike?
|
|
963
|
+
* 3. Behavioral Correlation: Can you discover the note-taking success pattern?
|
|
964
|
+
* 4. Retention Analysis: Can you identify the study group retention effect?
|
|
965
|
+
* 5. Feature Impact: Can you measure hint dependency on problem difficulty?
|
|
966
|
+
* 6. Anomaly Detection: Can you automatically detect the semester-end volume spike?
|
|
967
|
+
* 7. Funnel Analysis: Can you quantify the free vs paid completion gap?
|
|
968
|
+
* 8. Counter-intuitive Insight: Can you find the speed learner paradox?
|
|
969
|
+
*
|
|
970
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
971
|
+
* WHY "NEEDLE IN A HAYSTACK"?
|
|
972
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
973
|
+
*
|
|
974
|
+
* Each hook is a "needle" - a meaningful, actionable insight hidden in a
|
|
975
|
+
* "haystack" of 360K events. The challenge is:
|
|
976
|
+
*
|
|
977
|
+
* 1. FINDING the needles (discovery)
|
|
978
|
+
* 2. VALIDATING they are real patterns (statistical significance)
|
|
979
|
+
* 3. UNDERSTANDING why they matter (educational impact)
|
|
980
|
+
* 4. ACTING on them (platform improvements)
|
|
981
|
+
*
|
|
982
|
+
* This mirrors real-world EdTech analytics: your data contains valuable insights
|
|
983
|
+
* about student success, but you need the right tools and skills to find them.
|
|
984
|
+
*
|
|
985
|
+
* Happy Learning!
|
|
986
|
+
*
|
|
987
|
+
* ═══════════════════════════════════════════════════════════════════════════════
|
|
988
|
+
*/
|