make-mp-data 2.1.11 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +31 -0
  2. package/dungeons/adspend.js +35 -1
  3. package/dungeons/anon.js +25 -1
  4. package/dungeons/array-of-object-lookup.js +201 -0
  5. package/dungeons/benchmark-heavy.js +241 -0
  6. package/dungeons/benchmark-light.js +141 -0
  7. package/dungeons/big.js +10 -9
  8. package/dungeons/business.js +60 -12
  9. package/dungeons/complex.js +35 -1
  10. package/dungeons/copilot.js +383 -0
  11. package/dungeons/education.js +1005 -0
  12. package/dungeons/experiments.js +18 -4
  13. package/dungeons/fintech.js +976 -0
  14. package/dungeons/foobar.js +32 -0
  15. package/dungeons/food.js +988 -0
  16. package/dungeons/funnels.js +38 -1
  17. package/dungeons/gaming.js +26 -5
  18. package/dungeons/media.js +861 -270
  19. package/dungeons/mil.js +31 -3
  20. package/dungeons/mirror.js +33 -1
  21. package/dungeons/retention-cadence.js +211 -0
  22. package/dungeons/rpg.js +1178 -0
  23. package/dungeons/sanity.js +32 -2
  24. package/dungeons/sass.js +923 -0
  25. package/dungeons/scd.js +47 -1
  26. package/dungeons/simple.js +29 -14
  27. package/dungeons/social.js +928 -0
  28. package/dungeons/streaming.js +373 -0
  29. package/dungeons/strict-event-test.js +30 -0
  30. package/dungeons/student-teacher.js +19 -5
  31. package/dungeons/text-generation.js +120 -84
  32. package/dungeons/too-big-events.js +203 -0
  33. package/dungeons/{userAgent.js → user-agent.js} +23 -2
  34. package/entry.js +5 -4
  35. package/index.js +41 -54
  36. package/lib/core/config-validator.js +122 -7
  37. package/lib/core/context.js +7 -14
  38. package/lib/core/storage.js +57 -25
  39. package/lib/generators/adspend.js +12 -12
  40. package/lib/generators/events.js +6 -5
  41. package/lib/generators/funnels.js +32 -10
  42. package/lib/generators/product-lookup.js +262 -0
  43. package/lib/generators/product-names.js +195 -0
  44. package/lib/generators/profiles.js +3 -3
  45. package/lib/generators/scd.js +13 -3
  46. package/lib/generators/text.js +17 -4
  47. package/lib/orchestrators/mixpanel-sender.js +244 -204
  48. package/lib/orchestrators/user-loop.js +54 -16
  49. package/lib/templates/phrases.js +473 -16
  50. package/lib/templates/schema.d.ts +173 -0
  51. package/lib/templates/verbose-schema.js +140 -206
  52. package/lib/utils/chart.js +210 -0
  53. package/lib/utils/function-registry.js +285 -0
  54. package/lib/utils/json-evaluator.js +172 -0
  55. package/lib/utils/logger.js +34 -0
  56. package/lib/utils/utils.js +41 -4
  57. package/package.json +12 -21
  58. package/types.d.ts +15 -5
  59. package/dungeons/ai-chat-analytics-ed.js +0 -274
  60. package/dungeons/money2020-ed-also.js +0 -277
  61. package/dungeons/money2020-ed.js +0 -579
  62. package/lib/generators/text-bak-old.js +0 -1121
  63. package/lib/orchestrators/worker-manager.js +0 -203
  64. package/lib/templates/hooks-instructions.txt +0 -434
  65. package/lib/templates/phrases-bak.js +0 -925
  66. package/lib/templates/prompt (old).txt +0 -98
  67. package/lib/templates/schema-instructions.txt +0 -155
  68. package/lib/templates/scratch-dungeon-template.js +0 -116
  69. package/lib/templates/textQuickTest.js +0 -172
  70. package/lib/utils/ai.js +0 -120
  71. package/lib/utils/project.js +0 -166
@@ -0,0 +1,923 @@
1
+ import dayjs from "dayjs";
2
+ import utc from "dayjs/plugin/utc.js";
3
+ import "dotenv/config";
4
+ import * as u from "../lib/utils/utils.js";
5
+ import * as v from "ak-tools";
6
+
7
+ const SEED = "harness-sass";
8
+ dayjs.extend(utc);
9
+ const chance = u.initChance(SEED);
10
+ const num_users = 5_000;
11
+ const days = 100;
12
+
13
+ /** @typedef {import("../../types.js").Dungeon} Config */
14
+
15
+ /**
16
+ * CLOUDFORGE - B2B Cloud Infrastructure Monitoring & Deployment Platform
17
+ *
18
+ * CloudForge is a B2B SaaS platform that combines infrastructure monitoring (like Datadog)
19
+ * with deployment automation (like Terraform). It serves engineering teams across companies
20
+ * of all sizes - from startups deploying their first microservice to enterprises managing
21
+ * thousands of services across multi-cloud environments.
22
+ *
23
+ * CORE PLATFORM:
24
+ * Teams create workspaces, deploy services across AWS/GCP/Azure, and monitor everything
25
+ * from a unified dashboard. The platform tracks uptime, latency, error rates, CPU/memory
26
+ * usage, and costs. When things go wrong, CloudForge triggers alerts that route through
27
+ * PagerDuty/Slack integrations, and on-call engineers acknowledge and resolve incidents
28
+ * using automated runbooks.
29
+ *
30
+ * DEPLOYMENT PIPELINE:
31
+ * CloudForge manages CI/CD pipelines that deploy services to production, staging, and dev
32
+ * environments. Pipelines track commit counts, duration, and success/failure rates. When
33
+ * deployments fail, recovery deploys take longer as engineers carefully roll forward.
34
+ * Infrastructure can scale automatically or manually based on load.
35
+ *
36
+ * INCIDENT MANAGEMENT:
37
+ * Alerts flow through a severity system (info -> warning -> critical -> emergency).
38
+ * Critical and emergency alerts sometimes escalate into formal incidents with P1/P2
39
+ * classification, paging multiple teams. Teams with Slack + PagerDuty integrations
40
+ * respond and resolve incidents significantly faster than those without.
41
+ *
42
+ * COST MANAGEMENT:
43
+ * The platform generates cost reports showing daily/weekly/monthly spend. When costs
44
+ * spike beyond budgets, teams react by scaling down infrastructure. End-of-quarter
45
+ * pushes drive plan upgrades and team expansion as companies rush to hit targets.
46
+ *
47
+ * SECURITY & COMPLIANCE:
48
+ * Regular security scans check for vulnerabilities, compliance violations, and access
49
+ * audit issues. Feature flags control rollout of new capabilities across environments.
50
+ *
51
+ * PRICING MODEL:
52
+ * Four tiers: Free (limited), Team (small teams), Business (mid-market), Enterprise
53
+ * (large organizations). Pricing based on seat count and resource usage. Enterprise
54
+ * customers get dedicated customer success managers and annual contracts.
55
+ *
56
+ * TARGET USERS:
57
+ * Engineers, SREs, DevOps professionals, engineering managers, and executives who
58
+ * need visibility into their cloud infrastructure and deployment processes.
59
+ *
60
+ * WHY THESE EVENTS/PROPERTIES?
61
+ * - Events model a complete B2B SaaS lifecycle: onboarding -> adoption -> expansion -> renewal
62
+ * - Properties enable cohort analysis: company size, plan tier, role, cloud provider
63
+ * - Funnels reveal friction: onboarding completion, incident resolution, deployment success
64
+ * - Hooks simulate real operational insights hidden in production telemetry data
65
+ * - The "needle in haystack" hooks create discoverable patterns that mirror real B2B analytics
66
+ */
67
+
68
+ // Generate consistent IDs for lookup tables and event properties
69
+ const serviceIds = v.range(1, 201).map(() => `svc_${v.uid(8)}`);
70
+ const alertIds = v.range(1, 501).map(() => `alert_${v.uid(6)}`);
71
+ const pipelineIds = v.range(1, 101).map(() => `pipe_${v.uid(6)}`);
72
+ const runbookIds = v.range(1, 51).map(() => `rb_${v.uid(6)}`);
73
+ const companyIds = v.range(1, 301).map(() => `comp_${v.uid(8)}`);
74
+
75
+ // Module-level Maps for closure-based state tracking across hook calls
76
+ const costOverrunUsers = new Map();
77
+ const failedDeployUsers = new Map();
78
+
79
+ /** @type {Config} */
80
+ const config = {
81
+ token: "",
82
+ seed: SEED,
83
+ numDays: days,
84
+ numEvents: num_users * 120,
85
+ numUsers: num_users,
86
+ hasAnonIds: false,
87
+ hasSessionIds: true,
88
+ format: "json",
89
+ gzip: true,
90
+ alsoInferFunnels: false,
91
+ hasLocation: true,
92
+ hasAndroidDevices: false,
93
+ hasIOSDevices: false,
94
+ hasDesktopDevices: true,
95
+ hasBrowser: true,
96
+ hasCampaigns: false,
97
+ isAnonymous: false,
98
+ hasAdSpend: false,
99
+ percentUsersBornInDataset: 50,
100
+ hasAvatar: true,
101
+ makeChart: false,
102
+ batchSize: 2_500_000,
103
+ concurrency: 10,
104
+ writeToDisk: false,
105
+ scdProps: {},
106
+
107
+ funnels: [
108
+ {
109
+ sequence: ["workspace created", "service deployed", "dashboard viewed"],
110
+ isFirstFunnel: true,
111
+ conversionRate: 70,
112
+ timeToConvert: 2,
113
+ },
114
+ {
115
+ // Daily monitoring: dashboards, queries, API calls (most common)
116
+ sequence: ["dashboard viewed", "query executed", "api call"],
117
+ conversionRate: 80,
118
+ timeToConvert: 0.5,
119
+ weight: 5,
120
+ },
121
+ {
122
+ // Incident response pipeline
123
+ sequence: ["alert triggered", "alert acknowledged", "alert resolved"],
124
+ conversionRate: 55,
125
+ timeToConvert: 6,
126
+ weight: 4,
127
+ },
128
+ {
129
+ // Deployment cycle
130
+ sequence: ["deployment pipeline run", "service deployed", "dashboard viewed"],
131
+ conversionRate: 65,
132
+ timeToConvert: 1,
133
+ weight: 3,
134
+ },
135
+ {
136
+ // Infrastructure management
137
+ sequence: ["cost report generated", "infrastructure scaled", "security scan"],
138
+ conversionRate: 50,
139
+ timeToConvert: 4,
140
+ weight: 2,
141
+ },
142
+ {
143
+ // Team and config management
144
+ sequence: ["team member invited", "integration configured", "feature flag toggled"],
145
+ conversionRate: 40,
146
+ timeToConvert: 8,
147
+ weight: 2,
148
+ },
149
+ {
150
+ // Documentation and runbook usage
151
+ sequence: ["documentation viewed", "runbook executed", "service deployed"],
152
+ conversionRate: 45,
153
+ timeToConvert: 2,
154
+ weight: 2,
155
+ },
156
+ {
157
+ // Billing and account management
158
+ sequence: ["billing event", "dashboard viewed"],
159
+ conversionRate: 60,
160
+ timeToConvert: 1,
161
+ weight: 1,
162
+ },
163
+ ],
164
+
165
+ events: [
166
+ {
167
+ event: "workspace created",
168
+ weight: 1,
169
+ isFirstEvent: true,
170
+ properties: {
171
+ company_size: u.pickAWinner(["startup", "smb", "mid_market", "enterprise"]),
172
+ industry: u.pickAWinner(["tech", "finance", "healthcare", "retail", "media"]),
173
+ }
174
+ },
175
+ {
176
+ event: "service deployed",
177
+ weight: 10,
178
+ properties: {
179
+ service_id: u.pickAWinner(serviceIds),
180
+ service_type: u.pickAWinner(["web_app", "api", "database", "cache", "queue", "ml_model"]),
181
+ environment: u.pickAWinner(["production", "staging", "dev"]),
182
+ cloud_provider: u.pickAWinner(["aws", "gcp", "azure"]),
183
+ }
184
+ },
185
+ {
186
+ event: "dashboard viewed",
187
+ weight: 20,
188
+ properties: {
189
+ dashboard_type: u.pickAWinner(["overview", "cost", "performance", "security", "custom"]),
190
+ time_range: u.pickAWinner(["1h", "6h", "24h", "7d", "30d"]),
191
+ }
192
+ },
193
+ {
194
+ event: "alert triggered",
195
+ weight: 12,
196
+ properties: {
197
+ alert_id: u.pickAWinner(alertIds),
198
+ severity: u.pickAWinner(["info", "warning", "critical", "emergency"]),
199
+ alert_type: u.pickAWinner(["cpu", "memory", "latency", "error_rate", "disk", "network"]),
200
+ service_id: u.pickAWinner(serviceIds),
201
+ }
202
+ },
203
+ {
204
+ event: "alert acknowledged",
205
+ weight: 8,
206
+ properties: {
207
+ alert_id: u.pickAWinner(alertIds),
208
+ response_time_mins: u.weighNumRange(1, 120),
209
+ acknowledged_by_role: u.pickAWinner(["engineer", "sre", "manager", "oncall"]),
210
+ }
211
+ },
212
+ {
213
+ event: "alert resolved",
214
+ weight: 7,
215
+ properties: {
216
+ alert_id: u.pickAWinner(alertIds),
217
+ resolution_time_mins: u.weighNumRange(5, 1440),
218
+ root_cause: u.pickAWinner(["config_change", "capacity", "bug", "dependency", "network"]),
219
+ }
220
+ },
221
+ {
222
+ event: "deployment pipeline run",
223
+ weight: 9,
224
+ properties: {
225
+ pipeline_id: u.pickAWinner(pipelineIds),
226
+ status: u.pickAWinner(["success", "failed", "cancelled"]),
227
+ duration_sec: u.weighNumRange(30, 1800),
228
+ commit_count: u.weighNumRange(1, 20),
229
+ }
230
+ },
231
+ {
232
+ event: "infrastructure scaled",
233
+ weight: 5,
234
+ properties: {
235
+ service_id: u.pickAWinner(serviceIds),
236
+ scale_direction: u.pickAWinner(["up", "down"], 0.15),
237
+ previous_capacity: u.weighNumRange(1, 100),
238
+ new_capacity: u.weighNumRange(1, 100),
239
+ auto_scaled: u.pickAWinner([true, false], 0.15),
240
+ }
241
+ },
242
+ {
243
+ event: "cost report generated",
244
+ weight: 4,
245
+ properties: {
246
+ report_period: u.pickAWinner(["daily", "weekly", "monthly"]),
247
+ total_cost: u.weighNumRange(100, 50000),
248
+ cost_change_percent: u.weighNumRange(-30, 50),
249
+ }
250
+ },
251
+ {
252
+ event: "team member invited",
253
+ weight: 3,
254
+ properties: {
255
+ role: u.pickAWinner(["admin", "editor", "viewer", "billing"]),
256
+ invitation_method: u.pickAWinner(["email", "sso", "slack"]),
257
+ }
258
+ },
259
+ {
260
+ event: "integration configured",
261
+ weight: 4,
262
+ properties: {
263
+ integration_type: u.pickAWinner(["slack", "pagerduty", "jira", "github", "datadog", "terraform"]),
264
+ status: u.pickAWinner(["active", "paused", "error"]),
265
+ }
266
+ },
267
+ {
268
+ event: "query executed",
269
+ weight: 15,
270
+ properties: {
271
+ query_type: u.pickAWinner(["metrics", "logs", "traces"]),
272
+ time_range_hours: u.weighNumRange(1, 720),
273
+ result_count: u.weighNumRange(0, 10000),
274
+ }
275
+ },
276
+ {
277
+ event: "runbook executed",
278
+ weight: 3,
279
+ properties: {
280
+ runbook_id: u.pickAWinner(runbookIds),
281
+ trigger: u.pickAWinner(["manual", "automated", "alert_triggered"]),
282
+ success: u.pickAWinner([true, false], 0.15),
283
+ }
284
+ },
285
+ {
286
+ event: "billing event",
287
+ weight: 3,
288
+ properties: {
289
+ event_type: u.pickAWinner(["invoice_generated", "payment_received", "payment_failed", "plan_upgraded", "plan_downgraded"]),
290
+ amount: u.weighNumRange(99, 25000),
291
+ }
292
+ },
293
+ {
294
+ event: "security scan",
295
+ weight: 6,
296
+ properties: {
297
+ scan_type: u.pickAWinner(["vulnerability", "compliance", "access_audit"]),
298
+ findings_count: u.weighNumRange(0, 50),
299
+ critical_findings: u.weighNumRange(0, 10),
300
+ }
301
+ },
302
+ {
303
+ event: "api call",
304
+ weight: 16,
305
+ properties: {
306
+ endpoint: u.pickAWinner(["/deploy", "/status", "/metrics", "/alerts", "/config", "/billing"]),
307
+ method: u.pickAWinner(["GET", "POST", "PUT", "DELETE"]),
308
+ response_time_ms: u.weighNumRange(10, 5000),
309
+ status_code: u.pickAWinner([200, 201, 400, 401, 403, 500, 503]),
310
+ }
311
+ },
312
+ {
313
+ event: "documentation viewed",
314
+ weight: 7,
315
+ properties: {
316
+ doc_section: u.pickAWinner(["getting_started", "api_reference", "best_practices", "troubleshooting", "changelog"]),
317
+ time_on_page_sec: u.weighNumRange(5, 600),
318
+ }
319
+ },
320
+ {
321
+ event: "feature flag toggled",
322
+ weight: 4,
323
+ properties: {
324
+ flag_name: () => `flag_${chance.word()}`,
325
+ new_state: u.pickAWinner(["enabled", "disabled"], 0.15),
326
+ environment: u.pickAWinner(["production", "staging", "dev"]),
327
+ }
328
+ },
329
+ ],
330
+
331
+ superProps: {
332
+ plan_tier: u.pickAWinner(["free", "free", "team", "team", "business", "enterprise"]),
333
+ cloud_provider: u.pickAWinner(["aws", "gcp", "azure", "multi_cloud"]),
334
+ },
335
+
336
+ userProps: {
337
+ company_size: u.pickAWinner(["startup", "startup", "smb", "mid_market", "enterprise"]),
338
+ primary_role: u.pickAWinner(["engineer", "sre", "devops", "manager", "executive"]),
339
+ team_name: u.pickAWinner(["Platform", "Backend", "Frontend", "Data", "Security", "Infrastructure"]),
340
+ },
341
+
342
+ groupKeys: [
343
+ ["company_id", 300, ["workspace created", "service deployed", "billing event", "team member invited"]],
344
+ ],
345
+
346
+ groupProps: {
347
+ company_id: {
348
+ name: () => `${chance.word({ capitalize: true })} ${chance.pickone(["Systems", "Technologies", "Labs", "Cloud", "Digital", "Networks", "Solutions"])}`,
349
+ industry: u.pickAWinner(["tech", "finance", "healthcare", "retail", "media", "manufacturing", "logistics"]),
350
+ employee_count: u.pickAWinner(["1-10", "11-50", "51-200", "201-1000", "1001-5000", "5000+"]),
351
+ arr_bucket: u.pickAWinner(["<10k", "10k-50k", "50k-200k", "200k-1M", "1M+"]),
352
+ }
353
+ },
354
+
355
+ lookupTables: [],
356
+
357
+ /**
358
+ * ARCHITECTED ANALYTICS HOOKS
359
+ *
360
+ * This hook function creates 8 deliberate patterns in the data:
361
+ *
362
+ * 1. END-OF-QUARTER SPIKE: Days 80-90 drive plan upgrades and team expansion
363
+ * 2. CHURNED ACCOUNT SILENCING: ~10% of users go completely silent after month 1
364
+ * 3. ALERT ESCALATION REPLACEMENT: Critical alerts become "incident created" events
365
+ * 4. INTEGRATION USERS SUCCEED: Slack+PagerDuty users resolve incidents 50-60% faster
366
+ * 5. DOCS READERS DEPLOY MORE: Best practices readers get extra production deploys
367
+ * 6. COST OVERRUN PATTERN: Budget-exceeded users react by scaling down infrastructure
368
+ * 7. FAILED DEPLOYMENT RECOVERY: Recovery deploys take 1.5x longer, tracked across calls
369
+ * 8. ENTERPRISE VS STARTUP: Company size determines seat count, ACV, and health score
370
+ */
371
+ hook: function (record, type, meta) {
372
+ const NOW = dayjs();
373
+ const DATASET_START = NOW.subtract(days, "days");
374
+
375
+ // ─────────────────────────────────────────────────────────────
376
+ // Hook #1: END-OF-QUARTER SPIKE (event)
377
+ // Days 80-90: billing upgrades and team expansion surge
378
+ // ─────────────────────────────────────────────────────────────
379
+ if (type === "event") {
380
+ const EVENT_TIME = dayjs(record.time);
381
+ const dayInDataset = EVENT_TIME.diff(DATASET_START, "days", true);
382
+
383
+ if (record.event === "billing event") {
384
+ if (dayInDataset >= 80 && dayInDataset <= 90 && chance.bool({ likelihood: 40 })) {
385
+ record.event_type = "plan_upgraded";
386
+ record.quarter_end_push = true;
387
+ } else {
388
+ record.quarter_end_push = false;
389
+ }
390
+ }
391
+
392
+ if (record.event === "team member invited") {
393
+ if (dayInDataset >= 80 && dayInDataset <= 90) {
394
+ record.quarter_end_push = true;
395
+ // 50% of the time duplicate the invite event (hiring push)
396
+ if (chance.bool({ likelihood: 50 })) {
397
+ return {
398
+ event: "team member invited",
399
+ time: EVENT_TIME.add(chance.integer({ min: 1, max: 60 }), "minutes").toISOString(),
400
+ user_id: record.user_id,
401
+ role: chance.pickone(["editor", "viewer"]),
402
+ invitation_method: chance.pickone(["email", "sso", "slack"]),
403
+ quarter_end_push: true,
404
+ duplicate_invite: true,
405
+ };
406
+ }
407
+ } else {
408
+ record.quarter_end_push = false;
409
+ }
410
+ }
411
+ }
412
+
413
+ // ─────────────────────────────────────────────────────────────
414
+ // Hook #3: ALERT ESCALATION REPLACEMENT (event)
415
+ // Critical/emergency alerts sometimes become formal incidents
416
+ // ─────────────────────────────────────────────────────────────
417
+ if (type === "event") {
418
+ if (record.event === "alert triggered") {
419
+ const severity = record.severity;
420
+ if ((severity === "critical" || severity === "emergency") && chance.bool({ likelihood: 30 })) {
421
+ // REPLACE the event entirely with an "incident created" event
422
+ return {
423
+ event: "incident created",
424
+ time: record.time,
425
+ user_id: record.user_id,
426
+ escalation_level: chance.pickone(["P1", "P2"]),
427
+ teams_paged: chance.integer({ min: 1, max: 5 }),
428
+ incident_id: `inc_${v.uid(8)}`,
429
+ original_severity: severity,
430
+ original_alert_type: record.alert_type,
431
+ service_id: record.service_id,
432
+ auto_escalated: true,
433
+ };
434
+ }
435
+ }
436
+ }
437
+
438
+ // ─────────────────────────────────────────────────────────────
439
+ // Hook #6: COST OVERRUN PATTERN (event)
440
+ // Budget-exceeded users react by scaling down infrastructure
441
+ // Uses module-level costOverrunUsers Map for cross-call state
442
+ // ─────────────────────────────────────────────────────────────
443
+ if (type === "event") {
444
+ if (record.event === "cost report generated") {
445
+ const costChange = record.cost_change_percent;
446
+ if (costChange > 25) {
447
+ record.cost_alert = true;
448
+ record.budget_exceeded = true;
449
+ costOverrunUsers.set(record.user_id, true);
450
+ } else {
451
+ record.cost_alert = false;
452
+ record.budget_exceeded = false;
453
+ }
454
+ }
455
+
456
+ if (record.event === "infrastructure scaled") {
457
+ if (costOverrunUsers.has(record.user_id)) {
458
+ record.scale_direction = "down";
459
+ record.cost_reaction = true;
460
+ costOverrunUsers.delete(record.user_id);
461
+ } else {
462
+ record.cost_reaction = false;
463
+ }
464
+ }
465
+ }
466
+
467
+ // ─────────────────────────────────────────────────────────────
468
+ // Hook #7: FAILED DEPLOYMENT RECOVERY (event)
469
+ // Recovery deploys take 1.5x longer after a failure
470
+ // Uses module-level failedDeployUsers Map for cross-call state
471
+ // ─────────────────────────────────────────────────────────────
472
+ if (type === "event") {
473
+ if (record.event === "deployment pipeline run") {
474
+ const status = record.status;
475
+ if (status === "failed") {
476
+ failedDeployUsers.set(record.user_id, true);
477
+ record.recovery_deployment = false;
478
+ } else if (status === "success" && failedDeployUsers.has(record.user_id)) {
479
+ record.duration_sec = Math.floor((record.duration_sec || 300) * 1.5);
480
+ record.recovery_deployment = true;
481
+ failedDeployUsers.delete(record.user_id);
482
+ } else {
483
+ record.recovery_deployment = false;
484
+ }
485
+ }
486
+ }
487
+
488
+ // ─────────────────────────────────────────────────────────────
489
+ // Hook #2: CHURNED ACCOUNT SILENCING (everything)
490
+ // ~10% of users go completely silent after day 30
491
+ // ─────────────────────────────────────────────────────────────
492
+ if (type === "everything") {
493
+ const userEvents = record;
494
+ if (userEvents && userEvents.length > 0) {
495
+ const firstEvent = userEvents[0];
496
+ const idHash = String(firstEvent.user_id || firstEvent.device_id).split("").reduce((acc, char) => acc + char.charCodeAt(0), 0);
497
+ const isChurnedAccount = (idHash % 10) === 0;
498
+
499
+ if (isChurnedAccount) {
500
+ for (let i = userEvents.length - 1; i >= 0; i--) {
501
+ const evt = userEvents[i];
502
+ const dayInDataset = dayjs(evt.time).diff(DATASET_START, "days", true);
503
+ if (dayInDataset > 30) {
504
+ userEvents.splice(i, 1);
505
+ }
506
+ }
507
+ }
508
+ }
509
+ }
510
+
511
+ // ─────────────────────────────────────────────────────────────
512
+ // Hook #4: INTEGRATION USERS SUCCEED (everything)
513
+ // Users with both Slack AND PagerDuty integrations resolve faster
514
+ // ─────────────────────────────────────────────────────────────
515
+ if (type === "everything") {
516
+ const userEvents = record;
517
+
518
+ // First pass: check if user has both slack and pagerduty integrations
519
+ let hasSlack = false;
520
+ let hasPagerduty = false;
521
+
522
+ userEvents.forEach((event) => {
523
+ if (event.event === "integration configured") {
524
+ const integrationType = event.integration_type;
525
+ if (integrationType === "slack") hasSlack = true;
526
+ if (integrationType === "pagerduty") hasPagerduty = true;
527
+ }
528
+ });
529
+
530
+ const hasFullIntegration = hasSlack && hasPagerduty;
531
+
532
+ // Second pass: set integrated_team on all alert events, then boost for integrated users
533
+ userEvents.forEach((event) => {
534
+ if (event.event === "alert acknowledged") {
535
+ if (hasFullIntegration && event.response_time_mins) {
536
+ event.response_time_mins = Math.floor(event.response_time_mins * 0.4);
537
+ event.integrated_team = true;
538
+ } else {
539
+ event.integrated_team = false;
540
+ }
541
+ }
542
+ if (event.event === "alert resolved") {
543
+ if (hasFullIntegration && event.resolution_time_mins) {
544
+ event.resolution_time_mins = Math.floor(event.resolution_time_mins * 0.5);
545
+ event.integrated_team = true;
546
+ } else {
547
+ event.integrated_team = false;
548
+ }
549
+ }
550
+ });
551
+ }
552
+
553
+ // ─────────────────────────────────────────────────────────────
554
+ // Hook #5: DOCS READERS DEPLOY MORE (everything)
555
+ // Users who read best_practices 3+ times get extra production deploys
556
+ // ─────────────────────────────────────────────────────────────
557
+ if (type === "everything") {
558
+ const userEvents = record;
559
+
560
+ // First pass: count best_practices documentation views
561
+ let bestPracticesCount = 0;
562
+ userEvents.forEach((event) => {
563
+ if (event.event === "documentation viewed" && event.doc_section === "best_practices") {
564
+ bestPracticesCount++;
565
+ }
566
+ });
567
+
568
+ // Second pass: if 3+ best practices views, add extra production deploys
569
+ if (bestPracticesCount >= 3) {
570
+ const extraDeploys = chance.integer({ min: 2, max: 3 });
571
+ const lastEvent = userEvents[userEvents.length - 1];
572
+ if (lastEvent) {
573
+ for (let i = 0; i < extraDeploys; i++) {
574
+ const deployEvent = {
575
+ event: "service deployed",
576
+ time: dayjs(lastEvent.time).add(chance.integer({ min: 1, max: 48 }), "hours").toISOString(),
577
+ user_id: lastEvent.user_id,
578
+ service_id: chance.pickone(serviceIds),
579
+ service_type: chance.pickone(["web_app", "api", "database", "cache", "queue", "ml_model"]),
580
+ environment: "production",
581
+ cloud_provider: chance.pickone(["aws", "gcp", "azure"]),
582
+ docs_informed: true,
583
+ };
584
+ userEvents.splice(userEvents.length, 0, deployEvent);
585
+ }
586
+ }
587
+ }
588
+ }
589
+
590
+ // ─────────────────────────────────────────────────────────────
591
+ // Hook #8: ENTERPRISE VS STARTUP (user)
592
+ // Company size determines seat count, ACV, and health score
593
+ // ─────────────────────────────────────────────────────────────
594
+ if (type === "user") {
595
+ const companySize = record.company_size;
596
+
597
+ if (companySize === "enterprise") {
598
+ record.seat_count = chance.integer({ min: 50, max: 500 });
599
+ record.annual_contract_value = chance.integer({ min: 50000, max: 500000 });
600
+ record.customer_success_manager = true;
601
+ } else if (companySize === "mid_market") {
602
+ record.seat_count = chance.integer({ min: 10, max: 50 });
603
+ record.annual_contract_value = chance.integer({ min: 12000, max: 50000 });
604
+ record.customer_success_manager = false;
605
+ } else if (companySize === "smb") {
606
+ record.seat_count = chance.integer({ min: 3, max: 10 });
607
+ record.annual_contract_value = chance.integer({ min: 3600, max: 12000 });
608
+ record.customer_success_manager = false;
609
+ } else if (companySize === "startup") {
610
+ record.seat_count = chance.integer({ min: 1, max: 5 });
611
+ record.annual_contract_value = chance.integer({ min: 0, max: 3600 });
612
+ record.customer_success_manager = false;
613
+ }
614
+
615
+ record.customer_health_score = chance.integer({ min: 1, max: 100 });
616
+ }
617
+
618
+ return record;
619
+ }
620
+ };
621
+
622
+ export default config;
623
+
624
+ /**
625
+ * =================================================================================
626
+ * NEEDLE IN A HAYSTACK - CLOUDFORGE B2B SAAS ANALYTICS
627
+ * =================================================================================
628
+ *
629
+ * A B2B cloud infrastructure monitoring and deployment platform dungeon with 8
630
+ * deliberately architected analytics insights hidden in the data. This dungeon
631
+ * simulates CloudForge - a Datadog + Terraform hybrid serving engineering teams
632
+ * across companies of all sizes.
633
+ *
634
+ * =================================================================================
635
+ * DATASET OVERVIEW
636
+ * =================================================================================
637
+ *
638
+ * - 5,000 users over 100 days
639
+ * - 360K events across 18 event types (+ 1 hook-created event type)
640
+ * - 3 funnels (onboarding, incident response, deployment pipeline)
641
+ * - Group analytics (companies)
642
+ * - Lookup tables (services, alerts)
643
+ * - Desktop/browser only (B2B SaaS - no mobile devices)
644
+ *
645
+ * =================================================================================
646
+ * THE 8 ARCHITECTED HOOKS
647
+ * =================================================================================
648
+ *
649
+ * Each hook creates a specific, discoverable analytics insight that simulates
650
+ * real-world B2B SaaS behavior patterns. Several hooks use techniques like
651
+ * event removal (splice), event replacement, and module-level closure
652
+ * state tracking via Map objects.
653
+ *
654
+ * ---------------------------------------------------------------------------------
655
+ * 1. END-OF-QUARTER SPIKE (event hook)
656
+ * ---------------------------------------------------------------------------------
657
+ *
658
+ * PATTERN: During days 80-90 of the dataset (end of quarter), billing events
659
+ * shift toward plan upgrades 40% of the time, and team member invitations are
660
+ * duplicated 50% of the time. All affected events are tagged with
661
+ * quarter_end_push: true.
662
+ *
663
+ * HOW TO FIND IT:
664
+ * - Chart "billing event" by event_type, broken down by week
665
+ * - Chart "team member invited" count by day
666
+ * - Filter: quarter_end_push = true
667
+ * - Compare: last 10 days vs. rest of dataset
668
+ *
669
+ * EXPECTED INSIGHT: Clear spike in plan_upgraded billing events and team
670
+ * invitations in the final 10 days. Duplicate invitations create an
671
+ * artificially inflated invite count.
672
+ *
673
+ * REAL-WORLD ANALOGUE: End-of-quarter sales pushes, budget utilization
674
+ * deadlines, and team expansion before fiscal year-end.
675
+ *
676
+ * ---------------------------------------------------------------------------------
677
+ * 2. CHURNED ACCOUNT SILENCING (everything hook)
678
+ * ---------------------------------------------------------------------------------
679
+ *
680
+ * PATTERN: ~10% of users (determined by hash of distinct_id) go completely
681
+ * silent after day 30. ALL of their events after month 1 are removed via
682
+ * splice() - they simply vanish from the dataset.
683
+ *
684
+ * HOW TO FIND IT:
685
+ * - Chart: unique users per week
686
+ * - Retention analysis: D30 retention by cohort
687
+ * - Compare: users active in month 1 vs. month 2
688
+ * - Look for users with events ONLY in the first 30 days
689
+ *
690
+ * EXPECTED INSIGHT: A distinct cohort of ~300 users with activity exclusively
691
+ * in the first month. No gradual decline - a hard cutoff at day 30.
692
+ *
693
+ * REAL-WORLD ANALOGUE: Trial users who never convert, accounts that churn
694
+ * after initial evaluation period, or companies that lose budget approval.
695
+ *
696
+ * ---------------------------------------------------------------------------------
697
+ * 3. ALERT ESCALATION REPLACEMENT (event hook)
698
+ * ---------------------------------------------------------------------------------
699
+ *
700
+ * PATTERN: When an "alert triggered" event has severity "critical" or
701
+ * "emergency", there is a 30% chance the event is REPLACED entirely with a
702
+ * new event type: "incident created". This event type does NOT exist in the
703
+ * events array - it only appears because of hooks.
704
+ *
705
+ * HOW TO FIND IT:
706
+ * - Look for "incident created" events in the dataset (surprise event type)
707
+ * - Correlate: incident created events have escalation_level (P1, P2),
708
+ * teams_paged, incident_id, and auto_escalated: true
709
+ * - Compare: ratio of critical/emergency alerts to incident creations
710
+ *
711
+ * EXPECTED INSIGHT: Approximately 30% of critical/emergency alerts escalate
712
+ * into formal incidents. The "incident created" event is a hidden event type
713
+ * that analysts must discover through exploration.
714
+ *
715
+ * REAL-WORLD ANALOGUE: Automated escalation systems that create incident
716
+ * records from high-severity alerts (PagerDuty, OpsGenie workflows).
717
+ *
718
+ * ---------------------------------------------------------------------------------
719
+ * 4. INTEGRATION USERS SUCCEED (everything hook)
720
+ * ---------------------------------------------------------------------------------
721
+ *
722
+ * PATTERN: Users who have configured BOTH Slack AND PagerDuty integrations
723
+ * respond to and resolve alerts significantly faster:
724
+ * - alert_acknowledged response_time_mins reduced by 60%
725
+ * - alert_resolved resolution_time_mins reduced by 50%
726
+ * - Affected events tagged with integrated_team: true
727
+ *
728
+ * HOW TO FIND IT:
729
+ * - Segment users by: has "integration configured" for both "slack" AND "pagerduty"
730
+ * - Compare: average response_time_mins on alert acknowledged
731
+ * - Compare: average resolution_time_mins on alert resolved
732
+ * - Filter: integrated_team = true
733
+ *
734
+ * EXPECTED INSIGHT: Users with both integrations have median response time
735
+ * ~60% lower than baseline. This is a two-feature combination effect.
736
+ *
737
+ * REAL-WORLD ANALOGUE: Tool integration stacks that compound productivity
738
+ * (e.g., CI/CD + monitoring + alerting creating faster MTTR).
739
+ *
740
+ * ---------------------------------------------------------------------------------
741
+ * 5. DOCS READERS DEPLOY MORE (everything hook)
742
+ * ---------------------------------------------------------------------------------
743
+ *
744
+ * PATTERN: Users who view "best_practices" documentation 3 or more times get
745
+ * 2-3 extra "service deployed" events with environment: "production" spliced
746
+ * into their event stream. Tagged with docs_informed: true.
747
+ *
748
+ * HOW TO FIND IT:
749
+ * - Segment users by: count of "documentation viewed" where doc_section = "best_practices" >= 3
750
+ * - Compare: count of "service deployed" where environment = "production"
751
+ * - Filter: docs_informed = true
752
+ *
753
+ * EXPECTED INSIGHT: Users who read best practices documentation 3+ times
754
+ * deploy more services to production, suggesting docs drive confidence
755
+ * and adoption.
756
+ *
757
+ * REAL-WORLD ANALOGUE: Documentation engagement as a leading indicator of
758
+ * product adoption (developer tools where docs reading predicts usage).
759
+ *
760
+ * ---------------------------------------------------------------------------------
761
+ * 6. COST OVERRUN PATTERN (event hook - closure state)
762
+ * ---------------------------------------------------------------------------------
763
+ *
764
+ * PATTERN: When a "cost report generated" event has cost_change_percent > 25,
765
+ * the user is stored in a module-level Map. When that same user later triggers
766
+ * an "infrastructure scaled" event, the scale_direction is forced to "down"
767
+ * (cost-cutting reaction). Uses closure-based state tracking across separate
768
+ * hook calls.
769
+ *
770
+ * HOW TO FIND IT:
771
+ * - Filter: cost_report_generated where budget_exceeded = true
772
+ * - Correlate: subsequent infrastructure_scaled where cost_reaction = true
773
+ * - Compare: scale_direction distribution for cost_reaction users vs. others
774
+ *
775
+ * EXPECTED INSIGHT: Users who experience cost overruns (>25% increase)
776
+ * consistently scale down their infrastructure afterward. The Map-based
777
+ * tracking creates a causal chain across separate events.
778
+ *
779
+ * REAL-WORLD ANALOGUE: Cloud cost management behavior - teams that exceed
780
+ * budgets immediately react by reducing resource allocation.
781
+ *
782
+ * ---------------------------------------------------------------------------------
783
+ * 7. FAILED DEPLOYMENT RECOVERY (event hook - closure state)
784
+ * ---------------------------------------------------------------------------------
785
+ *
786
+ * PATTERN: When a deployment pipeline fails, the user is stored in a
787
+ * module-level Map. Their next successful deployment has duration_sec
788
+ * multiplied by 1.5x (recovery deploys are slower/more careful). Tagged
789
+ * with recovery_deployment: true.
790
+ *
791
+ * HOW TO FIND IT:
792
+ * - Filter: deployment_pipeline_run where recovery_deployment = true
793
+ * - Compare: average duration_sec for recovery vs. normal deployments
794
+ * - Sequence: look for failed -> success pairs per user
795
+ *
796
+ * EXPECTED INSIGHT: Recovery deployments after failures take 50% longer
797
+ * than normal deployments, reflecting more cautious deployment practices.
798
+ *
799
+ * REAL-WORLD ANALOGUE: Post-incident deployment behavior - engineers take
800
+ * extra care after a failed deploy, adding more checks and review steps.
801
+ *
802
+ * ---------------------------------------------------------------------------------
803
+ * 8. ENTERPRISE VS STARTUP (user hook)
804
+ * ---------------------------------------------------------------------------------
805
+ *
806
+ * PATTERN: Based on company_size, users get additional profile properties:
807
+ * - enterprise: seat_count (50-500), annual_contract_value (50K-500K), customer_success_manager: true
808
+ * - mid_market: seat_count (10-50), annual_contract_value (12K-50K)
809
+ * - smb: seat_count (3-10), annual_contract_value (3.6K-12K)
810
+ * - startup: seat_count (1-5), annual_contract_value (0-3.6K)
811
+ * - All users get customer_health_score (1-100)
812
+ *
813
+ * HOW TO FIND IT:
814
+ * - Segment users by: company_size
815
+ * - Compare: annual_contract_value distribution
816
+ * - Compare: seat_count ranges
817
+ * - Filter: customer_success_manager = true (enterprise only)
818
+ *
819
+ * EXPECTED INSIGHT: Clear segmentation of user base by company size with
820
+ * corresponding ACV and seat count distributions. Enterprise customers
821
+ * uniquely have dedicated CSMs.
822
+ *
823
+ * REAL-WORLD ANALOGUE: B2B SaaS customer segmentation where company size
824
+ * directly determines contract value, support tier, and expansion potential.
825
+ *
826
+ * =================================================================================
827
+ * ADVANCED ANALYSIS IDEAS
828
+ * =================================================================================
829
+ *
830
+ * CROSS-HOOK PATTERNS:
831
+ *
832
+ * 1. Churned + Enterprise: Do churned accounts (Hook #2) skew toward startups
833
+ * or are enterprise accounts also silenced? Cross-reference company_size
834
+ * with the ~10% churn cohort.
835
+ *
836
+ * 2. Integration + Cost: Do teams with full integrations (Hook #4) also manage
837
+ * costs better (Hook #6)? Integrated teams may detect cost overruns faster.
838
+ *
839
+ * 3. Docs + Deploys + Failures: Do docs readers (Hook #5) have fewer failed
840
+ * deployments (Hook #7)? Best practices readers may deploy more carefully.
841
+ *
842
+ * 4. Quarter Spike + Churn: Are quarter-end upgrades (Hook #1) correlated with
843
+ * accounts that later churn? False expansion before abandonment.
844
+ *
845
+ * 5. Enterprise Recovery: Do enterprise customers (Hook #8) recover from failed
846
+ * deployments (Hook #7) differently than startups?
847
+ *
848
+ * COHORT ANALYSIS:
849
+ *
850
+ * - Cohort by company_size: Compare all metrics across startup/smb/mid_market/enterprise
851
+ * - Cohort by plan_tier: Free vs. Team vs. Business vs. Enterprise engagement
852
+ * - Cohort by cloud_provider: AWS vs. GCP vs. Azure deployment and alert patterns
853
+ * - Cohort by primary_role: Engineer vs. SRE vs. DevOps vs. Manager behaviors
854
+ *
855
+ * FUNNEL ANALYSIS:
856
+ *
857
+ * - Onboarding: workspace created -> service deployed -> dashboard viewed
858
+ * Compare by company_size and plan_tier
859
+ * - Incident Response: alert triggered -> acknowledged -> resolved
860
+ * Compare integrated_team vs. non-integrated response times
861
+ * - Deployment: pipeline run -> service deployed -> dashboard viewed
862
+ * Compare recovery_deployment vs. normal deployment success
863
+ *
864
+ * KEY METRICS TO TRACK:
865
+ *
866
+ * - MTTR (Mean Time To Resolve): alert triggered -> alert resolved duration
867
+ * - Deployment Frequency: service deployed events per user per week
868
+ * - Deployment Success Rate: pipeline success vs. failure ratio
869
+ * - Cost Efficiency: total_cost trend over time per company
870
+ * - Feature Adoption: integration configured events by type
871
+ * - Documentation Engagement: documentation viewed by section
872
+ *
873
+ * =================================================================================
874
+ * EXPECTED METRICS SUMMARY
875
+ * =================================================================================
876
+ *
877
+ * Hook | Metric | Baseline | Hook Effect | Ratio
878
+ * -------------------------|--------------------------|-----------|----------------|------
879
+ * End-of-Quarter Spike | Plan upgrades/day | ~2/day | ~8/day | 4x
880
+ * Churned Accounts | Users active month 2 | 100% | 90% | 0.9x
881
+ * Alert Escalation | Incidents from alerts | 0% | ~30% of crit | new
882
+ * Integration Users | MTTR (minutes) | ~300 | ~150 | 0.5x
883
+ * Docs Readers | Prod deploys/user | ~3 | ~5-6 | 1.8x
884
+ * Cost Overrun | Scale-down after overrun | 50% | 100% | 2x
885
+ * Failed Deploy Recovery | Deploy duration (sec) | ~500 | ~750 | 1.5x
886
+ * Enterprise vs Startup | ACV range | $0-3.6K | $50K-500K | 100x+
887
+ *
888
+ * =================================================================================
889
+ * HOW TO RUN THIS DUNGEON
890
+ * =================================================================================
891
+ *
892
+ * From the dm4 root directory:
893
+ *
894
+ * npm start
895
+ *
896
+ * Or programmatically:
897
+ *
898
+ * import generate from './index.js';
899
+ * import config from './dungeons/harness-sass.js';
900
+ * const results = await generate(config);
901
+ *
902
+ * =================================================================================
903
+ * TECHNICAL NOTES
904
+ * =================================================================================
905
+ *
906
+ * - Module-level Maps (costOverrunUsers, failedDeployUsers) provide closure-based
907
+ * state tracking across individual event hook calls. This is the key differentiator
908
+ * for this dungeon - hooks 6 and 7 maintain state between separate invocations.
909
+ *
910
+ * - Hook #3 creates "incident created" events that do NOT exist in the events array.
911
+ * This event type only appears because of hook-based event replacement.
912
+ *
913
+ * - Hook #2 uses splice() in the "everything" handler to remove events after day 30
914
+ * for ~10% of users. This creates accounts with a hard activity cutoff - complete silence.
915
+ *
916
+ * - The "everything" hooks (2, 4, and 5) operate on the full user event array, enabling
917
+ * two-pass analysis: first identify patterns, then modify events accordingly.
918
+ *
919
+ * - Desktop/browser only: hasAndroidDevices and hasIOSDevices are both false,
920
+ * reflecting the B2B SaaS reality that CloudForge is used from workstations.
921
+ *
922
+ * =================================================================================
923
+ */