make-mp-data 3.0.4 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +46 -0
  2. package/dungeons/array-of-object-lookup-schema.json +327 -0
  3. package/dungeons/array-of-object-lookup.js +28 -8
  4. package/dungeons/capstone/capstone-ic3.js +291 -0
  5. package/dungeons/capstone/capstone-ic4.js +598 -0
  6. package/dungeons/capstone/capstone-ic5.js +668 -0
  7. package/dungeons/capstone/generate-product-lookup.js +309 -0
  8. package/dungeons/ecommerce-schema.json +462 -0
  9. package/dungeons/{copilot.js → ecommerce.js} +77 -15
  10. package/dungeons/education-schema.json +2409 -0
  11. package/dungeons/education.js +206 -442
  12. package/dungeons/fintech-schema.json +14034 -0
  13. package/dungeons/fintech.js +110 -389
  14. package/dungeons/foobar-schema.json +403 -0
  15. package/dungeons/foobar.js +27 -4
  16. package/dungeons/food-delivery-schema.json +192 -0
  17. package/dungeons/food-delivery.js +602 -0
  18. package/dungeons/food-schema.json +1152 -0
  19. package/dungeons/food.js +150 -383
  20. package/dungeons/gaming-schema.json +1270 -0
  21. package/dungeons/gaming.js +143 -3
  22. package/dungeons/insurance-application-schema.json +204 -0
  23. package/dungeons/insurance-application.js +605 -0
  24. package/dungeons/media-schema.json +906 -0
  25. package/dungeons/media.js +221 -391
  26. package/dungeons/retention-cadence-schema.json +78 -0
  27. package/dungeons/retention-cadence.js +35 -1
  28. package/dungeons/rpg-schema.json +4526 -0
  29. package/dungeons/rpg.js +130 -388
  30. package/dungeons/sanity-schema.json +255 -0
  31. package/dungeons/sanity.js +21 -10
  32. package/dungeons/sass-schema.json +1291 -0
  33. package/dungeons/sass.js +210 -337
  34. package/dungeons/scd-schema.json +919 -0
  35. package/dungeons/scd.js +38 -10
  36. package/dungeons/simple-schema.json +608 -0
  37. package/dungeons/simple.js +48 -11
  38. package/dungeons/simplest-schema.json +1418 -0
  39. package/dungeons/simplest.js +392 -0
  40. package/dungeons/social-schema.json +1118 -0
  41. package/dungeons/social.js +124 -365
  42. package/dungeons/text-generation-schema.json +3096 -0
  43. package/dungeons/text-generation.js +71 -0
  44. package/index.js +6 -3
  45. package/lib/core/config-validator.js +18 -0
  46. package/lib/core/storage.js +5 -5
  47. package/lib/generators/events.js +4 -4
  48. package/lib/orchestrators/mixpanel-sender.js +12 -7
  49. package/lib/orchestrators/user-loop.js +14 -6
  50. package/lib/templates/soup-presets.js +188 -0
  51. package/lib/utils/utils.js +52 -6
  52. package/package.json +1 -1
  53. package/types.d.ts +20 -3
  54. package/dungeons/adspend.js +0 -117
  55. package/dungeons/anon.js +0 -128
  56. package/dungeons/benchmark-heavy.js +0 -240
  57. package/dungeons/benchmark-light.js +0 -126
  58. package/dungeons/big.js +0 -226
  59. package/dungeons/business.js +0 -391
  60. package/dungeons/complex.js +0 -428
  61. package/dungeons/experiments.js +0 -137
  62. package/dungeons/funnels.js +0 -309
  63. package/dungeons/mil.js +0 -323
  64. package/dungeons/mirror.js +0 -160
  65. package/dungeons/soup-test.js +0 -52
  66. package/dungeons/streaming.js +0 -372
  67. package/dungeons/strict-event-test.js +0 -30
  68. package/dungeons/student-teacher.js +0 -438
  69. package/dungeons/too-big-events.js +0 -203
  70. package/dungeons/user-agent.js +0 -209
@@ -12,6 +12,77 @@ const days = 92;
12
12
 
13
13
  /** @typedef {import("../types.js").Dungeon} Dungeon */
14
14
 
15
+ /*
16
+ * ============================================================================
17
+ * DATASET OVERVIEW
18
+ * ============================================================================
19
+ *
20
+ * App: Text Generation Demo — showcases DM4's organic text generation
21
+ * Scale: 8,000 users, ~960K events, 92 days
22
+ *
23
+ * A text-heavy SaaS analytics dungeon that exercises every text generation
24
+ * style available in DM4. Users interact through support tickets, product
25
+ * reviews, forum posts, search queries, chat messages, social media posts
26
+ * (Twitter, LinkedIn, Reddit), bug reports, feature requests, onboarding
27
+ * feedback, charity/wedding comments, and webinar chat.
28
+ *
29
+ * Each event type uses a dedicated createTextGenerator() configured with its
30
+ * own style, tone, formality, keyword banks, typo rates, and authenticity
31
+ * levels. Text generator styles used: support, review, forum, search,
32
+ * feedback, chat, email, tweet, comments (9 distinct styles).
33
+ *
34
+ * Events:
35
+ * social_media_tweet (15) > chat_message (10) > charity_comment_posted (8)
36
+ * > wedding_comment_posted (6) > company_announcement_tweet (3)
37
+ * > all others (1 each): support ticket, review, forum post, search,
38
+ * feedback, email, twitter, linkedin, reddit, bug report, feature
39
+ * request, onboarding, tutorial comment, webinar chat, api thread
40
+ * ============================================================================
41
+ */
42
+
43
+ /*
44
+ * ============================================================================
45
+ * ANALYTICS HOOKS
46
+ * ============================================================================
47
+ *
48
+ * Hook 1: Power User + Churn Risk Classification
49
+ * Type: user
50
+ * What: Users with engagement_score > 70 get is_power_user = true.
51
+ * Users inactive > 20 days get risk_level = "high_churn", else "healthy".
52
+ * Mixpanel report:
53
+ * - Insights > user profile breakdown by "is_power_user"
54
+ * - Insights > user profile breakdown by "risk_level", cross-reference
55
+ * with "user_tier" to see churn risk by plan
56
+ *
57
+ * Hook 2: Critical Ticket Auto-Escalation
58
+ * Type: event
59
+ * What: enterprise_support_ticket events with priority = "critical" get
60
+ * escalation_level bumped by 1 (max 3) and auto_escalated = true.
61
+ * Mixpanel report:
62
+ * - Insights > "enterprise_support_ticket" total events, breakdown by
63
+ * auto_escalated
64
+ * - Expect: critical tickets show auto_escalated = true
65
+ *
66
+ * Hook 3: Critical Bug Flagging
67
+ * Type: event
68
+ * What: bug_report_submitted events with severity = "critical" AND
69
+ * is_reproducible = true get requires_immediate_review = true and a
70
+ * random estimated_fix_hours (1-8).
71
+ * Mixpanel report:
72
+ * - Insights > "bug_report_submitted" total events, breakdown by
73
+ * requires_immediate_review
74
+ * - Expect: only critical + reproducible bugs are flagged
75
+ *
76
+ * Hook 4: Enterprise Satisfaction Survey Injection
77
+ * Type: everything
78
+ * What: Enterprise-tier users with > 5 events get a
79
+ * "satisfaction_survey_triggered" event appended with a 1-10 NPS score.
80
+ * Mixpanel report:
81
+ * - Insights > "satisfaction_survey_triggered" AVG(score), breakdown by
82
+ * product_tier
83
+ * - Expect: only enterprise users have survey events
84
+ * ============================================================================
85
+ */
15
86
 
16
87
  // Enterprise support ticket generator with keywords and high authenticity
17
88
  const enterpriseSupportGen = createTextGenerator({
package/index.js CHANGED
@@ -197,13 +197,16 @@ async function main(config) {
197
197
 
198
198
  // ! DATA GENERATION ENDS HERE
199
199
 
200
+ // Flush when writeToDisk is enabled OR batch mode activated (to capture tail data)
201
+ const shouldFlush = validatedConfig.writeToDisk || context.isBatchMode();
202
+
200
203
  // Step 10: Flush lookup tables to disk (always as CSVs)
201
- if (validatedConfig.writeToDisk) {
204
+ if (shouldFlush) {
202
205
  await flushLookupTablesToDisk(storage, validatedConfig);
203
206
  }
204
207
 
205
- // Step 11: Flush other storage containers to disk (if writeToDisk enabled)
206
- if (validatedConfig.writeToDisk) {
208
+ // Step 11: Flush other storage containers to disk
209
+ if (shouldFlush) {
207
210
  await flushStorageToDisk(storage, validatedConfig);
208
211
  }
209
212
 
@@ -11,6 +11,7 @@
11
11
  import dayjs from "dayjs";
12
12
  import { makeName } from "ak-tools";
13
13
  import * as u from "../utils/utils.js";
14
+ import { resolveSoup } from "../templates/soup-presets.js";
14
15
 
15
16
  /**
16
17
  * Infers funnels from the provided events
@@ -136,6 +137,12 @@ export function validateDungeonConfig(config) {
136
137
  concurrency = 1;
137
138
  }
138
139
 
140
+ // Auto-enable batch mode for large datasets to prevent OOM
141
+ if (numEvents >= 2_000_000 && config.batchSize === undefined) {
142
+ batchSize = 1_000_000;
143
+ console.warn(`⚠️ Auto-enabling batch mode: numEvents (${numEvents.toLocaleString()}) >= 2M. Using batchSize of ${batchSize.toLocaleString()}.`);
144
+ }
145
+
139
146
  // Ensure defaults for deep objects
140
147
  if (!config.superProps) config.superProps = superProps;
141
148
  if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
@@ -148,6 +155,17 @@ export function validateDungeonConfig(config) {
148
155
  throw new Error("Either epochStart or numDays must be provided");
149
156
  }
150
157
 
158
+ // Resolve soup presets (must happen after numDays is computed)
159
+ const resolved = resolveSoup(soup, numDays);
160
+ soup = resolved.soup;
161
+ // Apply suggested birth distribution params if not explicitly set by the dungeon
162
+ if (resolved.suggestedBornRecentBias !== undefined && config.bornRecentBias === undefined) {
163
+ config.bornRecentBias = resolved.suggestedBornRecentBias;
164
+ }
165
+ if (resolved.suggestedPercentUsersBornInDataset !== undefined && config.percentUsersBornInDataset === undefined) {
166
+ config.percentUsersBornInDataset = resolved.suggestedPercentUsersBornInDataset;
167
+ }
168
+
151
169
  // Use provided name if non-empty string, otherwise generate one
152
170
  if (!name || name === "") {
153
171
  name = makeName();
@@ -366,12 +366,12 @@ export class StorageManager {
366
366
  if (config.writeToDisk === false) {
367
367
  const batchSize = config.batchSize || 1_000_000;
368
368
  const numEvents = config.numEvents || 0;
369
-
369
+
370
370
  if (batchSize < numEvents) {
371
- throw new Error(
372
- `Configuration error: writeToDisk is explicitly set to false but batchSize (${batchSize}) is lower than numEvents (${numEvents}). ` +
373
- `This would result in data loss as batched data would be discarded. ` +
374
- `Either set writeToDisk to true, increase batchSize to be >= numEvents, or provide a Mixpanel token to send data directly.`
371
+ console.warn(
372
+ `⚠️ writeToDisk is false but batchSize (${batchSize.toLocaleString()}) < numEvents (${numEvents.toLocaleString()}). ` +
373
+ `Batch files will be written to disk temporarily to avoid OOM. ` +
374
+ `They will be cleaned up after Mixpanel import if a token is provided.`
375
375
  );
376
376
  }
377
377
  }
@@ -52,9 +52,9 @@ export async function makeEvent(
52
52
  const chance = u.getChance();
53
53
 
54
54
  // Extract soup configuration for time distribution
55
- // Dynamic peaks: one per week for long ranges, minimum 5
56
- const defaultPeaks = Math.max(5, Math.ceil((config.numDays || 30) / 7));
57
- const { mean = 0, deviation = 2, peaks = defaultPeaks } = config.soup || {};
55
+ // Dynamic peaks: enough to flatten DOW interference from chunk boundaries
56
+ const defaultPeaks = Math.max(5, (config.numDays || 30) * 2);
57
+ const { mean = 0, deviation = 2, peaks = defaultPeaks, dayOfWeekWeights, hourOfDayWeights } = /** @type {import('../../types').SoupConfig} */ (config.soup) || {};
58
58
 
59
59
  // Extract feature flags from config
60
60
  const {
@@ -102,7 +102,7 @@ export async function makeEvent(
102
102
  shiftedTimestamp = earliestTime + context.TIME_SHIFT_SECONDS;
103
103
  } else {
104
104
  // TimeSoup returns unix seconds; shift and convert to ISO once
105
- const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
105
+ const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean, dayOfWeekWeights, hourOfDayWeights, context.TIME_SHIFT_SECONDS);
106
106
  shiftedTimestamp = soupTimestamp + context.TIME_SHIFT_SECONDS;
107
107
  }
108
108
  // Drop events that would land in the future (Mixpanel rewrites these to "now", causing pile-ups)
@@ -233,15 +233,20 @@ export async function sendToMixpanel(context) {
233
233
  if (!writeToDisk && isBATCH_MODE) {
234
234
  const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
235
235
  if (writeDir) {
236
+ const configName = context.config.name;
236
237
  const listDir = await ls(writeDir);
237
238
  // @ts-ignore
238
- const files = listDir.filter(f =>
239
- f.includes('-EVENTS') ||
240
- f.includes('-USERS') ||
241
- f.includes('-ADSPEND') ||
242
- f.includes('-GROUPS') ||
243
- f.includes('-GROUP-EVENTS')
244
- );
239
+ const files = listDir.filter(f => {
240
+ if (configName && !f.includes(configName)) return false;
241
+ return f.includes('-EVENTS') ||
242
+ f.includes('-USERS') ||
243
+ f.includes('-ADSPEND') ||
244
+ f.includes('-GROUPS') ||
245
+ f.includes('-GROUP-EVENTS') ||
246
+ f.includes('-SCD') ||
247
+ f.includes('-MIRROR') ||
248
+ f.includes('-LOOKUP');
249
+ });
245
250
  for (const file of files) {
246
251
  await rm(file);
247
252
  }
@@ -59,6 +59,7 @@ export async function userLoop(context) {
59
59
  let cancelled = false;
60
60
  const onSigint = () => {
61
61
  cancelled = true;
62
+ USER_CONN.clearQueue();
62
63
  if (verbose) console.log(`\n\nStopping generation (Ctrl+C)...\n`);
63
64
  };
64
65
  process.on('SIGINT', onSigint);
@@ -109,7 +110,7 @@ export async function userLoop(context) {
109
110
  if (userIsBornInDataset) {
110
111
  let biasedCreated = dayjs(created).subtract(daysShift, 'd');
111
112
 
112
- if (bornRecentBias > 0) {
113
+ if (bornRecentBias !== 0) {
113
114
  // Calculate how far into the dataset this user was born (0 = start, 1 = end/recent)
114
115
  const datasetStart = dayjs.unix(global.FIXED_BEGIN);
115
116
  const datasetEnd = dayjs.unix(context.FIXED_NOW);
@@ -117,10 +118,17 @@ export async function userLoop(context) {
117
118
  // Clamp userPosition to [0, 1] to handle edge cases from rounding in time calculations
118
119
  const userPosition = Math.max(0, Math.min(1, biasedCreated.diff(datasetStart) / totalDuration));
119
120
 
120
- // Apply power function to bias toward recent (higher values)
121
- // exponent < 1 shifts distribution toward 1 (recent)
122
- const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
123
- const biasedPosition = Math.pow(userPosition, exponent);
121
+ let biasedPosition;
122
+ if (bornRecentBias > 0) {
123
+ // Positive bias: exponent < 1 shifts distribution toward 1 (recent)
124
+ const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
125
+ biasedPosition = Math.pow(userPosition, exponent);
126
+ } else {
127
+ // Negative bias: mirror the power function to shift toward 0 (early)
128
+ // -0.3 bias -> 0.79 exponent applied to (1 - position), then mirrored back
129
+ const exponent = 1 - (Math.abs(bornRecentBias) * 0.7);
130
+ biasedPosition = 1 - Math.pow(1 - userPosition, exponent);
131
+ }
124
132
 
125
133
  // Convert back to timestamp
126
134
  biasedCreated = datasetStart.add(biasedPosition * totalDuration, 'millisecond');
@@ -233,7 +241,7 @@ export async function userLoop(context) {
233
241
 
234
242
  // ALL SUBSEQUENT EVENTS (funnels for converted users, standalone for all)
235
243
  let userChurned = false;
236
- while (numEventsPreformed < numEventsThisUserWillPreform) {
244
+ while (numEventsPreformed < numEventsThisUserWillPreform && !cancelled) {
237
245
  let newEvents;
238
246
  if (usageFunnels.length && userConverted) {
239
247
  const currentFunnel = chance.pickone(usageFunnels);
@@ -0,0 +1,188 @@
1
+ /**
2
+ * TimeSoup preset configurations
3
+ * Each preset defines time distribution parameters that produce distinct patterns.
4
+ *
5
+ * Parameters:
6
+ * - peaks(numDays): function returning number of Gaussian clusters
7
+ * - deviation: controls peak width (higher = tighter)
8
+ * - mean: offset from chunk center (0 = centered)
9
+ * - dayOfWeekWeights: 7-element array [Sun..Sat], max=1.0, null to disable
10
+ * - hourOfDayWeights: 24-element array [0h..23h UTC], max=1.0, null to disable
11
+ *
12
+ * Some presets also suggest bornRecentBias and percentUsersBornInDataset,
13
+ * but those are top-level dungeon config — presets only set them if not already specified.
14
+ */
15
+
16
+ // Real-world Mixpanel DOW pattern: weekday-heavy, Saturday valley
17
+ export const REAL_DOW = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
18
+
19
+ // Real-world Mixpanel HOD pattern: early-morning peak (UTC), afternoon valley
20
+ export const REAL_HOD = [
21
+ 0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
22
+ 0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
23
+ 0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
24
+ ];
25
+
26
+ // Flat weights (no cyclical pattern)
27
+ export const FLAT_DOW = [1, 1, 1, 1, 1, 1, 1];
28
+ export const FLAT_HOD = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
29
+
30
+ /** @type {Record<string, {peaks: (numDays: number) => number, deviation: number, mean: number, dayOfWeekWeights: number[]|null, hourOfDayWeights: number[]|null, bornRecentBias?: number, percentUsersBornInDataset?: number}>} */
31
+ export const SOUP_PRESETS = {
32
+ /**
33
+ * steady — Mature SaaS / Stable Product
34
+ * Nearly flat day-over-day, slight weekly pattern, minimal growth trend.
35
+ */
36
+ steady: {
37
+ peaks: (numDays) => Math.max(5, numDays * 2),
38
+ deviation: 1.5,
39
+ mean: 0,
40
+ dayOfWeekWeights: REAL_DOW,
41
+ hourOfDayWeights: REAL_HOD,
42
+ bornRecentBias: 0.1,
43
+ percentUsersBornInDataset: 10,
44
+ },
45
+
46
+ /**
47
+ * growth — Growing Startup (DEFAULT)
48
+ * Gradual uptrend with visible weekly peaks. This is the default behavior.
49
+ */
50
+ growth: {
51
+ peaks: (numDays) => Math.max(5, numDays * 2),
52
+ deviation: 2,
53
+ mean: 0,
54
+ dayOfWeekWeights: REAL_DOW,
55
+ hourOfDayWeights: REAL_HOD,
56
+ bornRecentBias: 0.3,
57
+ percentUsersBornInDataset: 15,
58
+ },
59
+
60
+ /**
61
+ * spiky — Event-Driven / Bursty
62
+ * Clear peaks and valleys, dramatic variation. Fewer Gaussian clusters + tight deviation.
63
+ */
64
+ spiky: {
65
+ peaks: (numDays) => Math.max(5, Math.ceil(numDays / 10)),
66
+ deviation: 3.5,
67
+ mean: 0,
68
+ dayOfWeekWeights: REAL_DOW,
69
+ hourOfDayWeights: REAL_HOD,
70
+ bornRecentBias: 0.3,
71
+ percentUsersBornInDataset: 20,
72
+ },
73
+
74
+ /**
75
+ * seasonal — Strong Cyclical Patterns
76
+ * 3-4 major waves across the dataset. Very few peaks create dramatic macro trends.
77
+ */
78
+ seasonal: {
79
+ peaks: () => 4,
80
+ deviation: 2.5,
81
+ mean: 0,
82
+ dayOfWeekWeights: REAL_DOW,
83
+ hourOfDayWeights: REAL_HOD,
84
+ bornRecentBias: 0.2,
85
+ percentUsersBornInDataset: 25,
86
+ },
87
+
88
+ /**
89
+ * global — Distributed Users Across Timezones
90
+ * Very flat hourly + daily distribution. No cyclical patterns.
91
+ */
92
+ global: {
93
+ peaks: (numDays) => Math.max(5, numDays * 2),
94
+ deviation: 1,
95
+ mean: 0,
96
+ dayOfWeekWeights: FLAT_DOW,
97
+ hourOfDayWeights: FLAT_HOD,
98
+ bornRecentBias: 0,
99
+ percentUsersBornInDataset: 10,
100
+ },
101
+
102
+ /**
103
+ * churny — High Churn / Declining Product
104
+ * Flat distribution (no growth trend). All users pre-exist the dataset,
105
+ * so there's no acceleration. Combine with an "everything" hook that
106
+ * filters late events to create a true declining shape.
107
+ */
108
+ churny: {
109
+ peaks: (numDays) => Math.max(5, numDays * 2),
110
+ deviation: 2,
111
+ mean: 0,
112
+ dayOfWeekWeights: REAL_DOW,
113
+ hourOfDayWeights: REAL_HOD,
114
+ bornRecentBias: 0,
115
+ percentUsersBornInDataset: 5,
116
+ },
117
+
118
+ /**
119
+ * chaotic — Unpredictable / Irregular Patterns
120
+ * Few peaks + very tight clustering = dramatic bursts separated by quiet stretches.
121
+ */
122
+ chaotic: {
123
+ peaks: (numDays) => Math.max(3, Math.ceil(numDays / 20)),
124
+ deviation: 4,
125
+ mean: 0,
126
+ dayOfWeekWeights: REAL_DOW,
127
+ hourOfDayWeights: REAL_HOD,
128
+ bornRecentBias: 0.5,
129
+ percentUsersBornInDataset: 40,
130
+ },
131
+ };
132
+
133
+ /** @type {string[]} */
134
+ export const PRESET_NAMES = Object.keys(SOUP_PRESETS);
135
+
136
+ /**
137
+ * Resolves a soup config — handles string presets, preset+overrides, and raw objects.
138
+ * @param {string | object} soup - Soup config from dungeon
139
+ * @param {number} numDays - Number of days in the dataset
140
+ * @returns {{ soup: object, suggestedBornRecentBias?: number, suggestedPercentUsersBornInDataset?: number }}
141
+ */
142
+ export function resolveSoup(soup, numDays) {
143
+ if (!soup) return { soup: {} };
144
+
145
+ // String preset: "growth", "spiky", etc.
146
+ if (typeof soup === 'string') {
147
+ const preset = SOUP_PRESETS[soup];
148
+ if (!preset) {
149
+ throw new Error(`Unknown soup preset: "${soup}". Valid presets: ${PRESET_NAMES.join(', ')}`);
150
+ }
151
+ return {
152
+ soup: {
153
+ peaks: preset.peaks(numDays),
154
+ deviation: preset.deviation,
155
+ mean: preset.mean,
156
+ dayOfWeekWeights: preset.dayOfWeekWeights,
157
+ hourOfDayWeights: preset.hourOfDayWeights,
158
+ },
159
+ suggestedBornRecentBias: preset.bornRecentBias,
160
+ suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
161
+ };
162
+ }
163
+
164
+ // Object with preset key: { preset: "growth", deviation: 3 }
165
+ if (typeof soup === 'object' && soup.preset) {
166
+ const preset = SOUP_PRESETS[soup.preset];
167
+ if (!preset) {
168
+ throw new Error(`Unknown soup preset: "${soup.preset}". Valid presets: ${PRESET_NAMES.join(', ')}`);
169
+ }
170
+ const base = {
171
+ peaks: preset.peaks(numDays),
172
+ deviation: preset.deviation,
173
+ mean: preset.mean,
174
+ dayOfWeekWeights: preset.dayOfWeekWeights,
175
+ hourOfDayWeights: preset.hourOfDayWeights,
176
+ };
177
+ // Apply overrides (excluding the 'preset' key itself)
178
+ const { preset: _, ...overrides } = soup;
179
+ return {
180
+ soup: { ...base, ...overrides },
181
+ suggestedBornRecentBias: preset.bornRecentBias,
182
+ suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
183
+ };
184
+ }
185
+
186
+ // Raw object: { peaks: 10, deviation: 2 } — pass through unchanged
187
+ return { soup };
188
+ }
@@ -1180,7 +1180,17 @@ let soupHits = 0;
1180
1180
  * Divides the range into `peaks` chunks, picks one randomly, then samples within it.
1181
1181
  * Returns unix seconds (not ISO string) for performance — caller converts once.
1182
1182
  */
1183
- function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0) {
1183
+ // Default day-of-week weights (0=Sun, 1=Mon, ..., 6=Sat) derived from real Mixpanel data
1184
+ const DEFAULT_DOW_WEIGHTS = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
1185
+
1186
+ // Default hour-of-day weights (0=midnight, ..., 23=11pm UTC) — derived from real Mixpanel data
1187
+ const DEFAULT_HOD_WEIGHTS = [
1188
+ 0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
1189
+ 0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
1190
+ 0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
1191
+ ];
1192
+
1193
+ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0, dayOfWeekWeights = DEFAULT_DOW_WEIGHTS, hourOfDayWeights = DEFAULT_HOD_WEIGHTS, timeShiftSeconds = 0) {
1184
1194
  if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
1185
1195
  if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
1186
1196
  const chance = getChance();
@@ -1193,21 +1203,57 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
1193
1203
  }
1194
1204
  const chunkSize = totalRange / peaks;
1195
1205
 
1196
- // Select a random chunk based on the number of peaks
1206
+ // Phase 1: Gaussian chunk sampling (macro trend across the time range)
1197
1207
  const peakIndex = integer(0, peaks - 1);
1198
1208
  const chunkStart = earliestTime + peakIndex * chunkSize;
1199
1209
  const chunkEnd = chunkStart + chunkSize;
1200
1210
  const chunkMid = (chunkStart + chunkEnd) / 2;
1201
-
1202
- // Generate offset from normal distribution, clamp to chunk boundaries
1203
1211
  const maxDeviation = chunkSize / deviation;
1204
1212
  const offset = chance.normal({ mean: mean, dev: maxDeviation });
1205
1213
  const proposedTime = chunkMid + offset;
1206
1214
  const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
1207
- const finalTime = Math.max(earliestTime, Math.min(latestTime, clampedTime));
1215
+ let candidate = Math.max(earliestTime, Math.min(latestTime, clampedTime));
1216
+
1217
+ // Phase 2: DOW accept/reject — retry if day-of-week doesn't pass weight check
1218
+ if (dayOfWeekWeights) {
1219
+ for (let attempt = 0; attempt < 50; attempt++) {
1220
+ const dow = new Date((candidate + timeShiftSeconds) * 1000).getUTCDay();
1221
+ if (chance.random() < dayOfWeekWeights[dow]) break;
1222
+ // Rejected — resample from Gaussian chunks
1223
+ const pi = integer(0, peaks - 1);
1224
+ const cs = earliestTime + pi * chunkSize;
1225
+ const ce = cs + chunkSize;
1226
+ const cm = (cs + ce) / 2;
1227
+ const md = chunkSize / deviation;
1228
+ const off = chance.normal({ mean: mean, dev: md });
1229
+ const pt = cm + off;
1230
+ candidate = Math.max(earliestTime, Math.min(latestTime, Math.max(cs, Math.min(ce, pt))));
1231
+ }
1232
+ }
1233
+
1234
+ // Phase 3: Redistribute hour-of-day (changes only hour within same day)
1235
+ if (hourOfDayWeights) {
1236
+ const shifted = candidate + timeShiftSeconds;
1237
+ const d = new Date(shifted * 1000);
1238
+ const currentMinute = d.getUTCMinutes();
1239
+ const currentSecond = d.getUTCSeconds();
1240
+
1241
+ const totalHodWeight = hourOfDayWeights.reduce((s, w) => s + w, 0);
1242
+ let roll = chance.random() * totalHodWeight;
1243
+ let newHour = 0;
1244
+ for (let h = 0; h < 24; h++) {
1245
+ roll -= hourOfDayWeights[h];
1246
+ if (roll <= 0) { newHour = h; break; }
1247
+ }
1248
+
1249
+ const dayStartShifted = Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()) / 1000;
1250
+ const newShifted = dayStartShifted + newHour * 3600 + currentMinute * 60 + currentSecond;
1251
+ candidate = newShifted - timeShiftSeconds;
1252
+ candidate = Math.max(earliestTime, Math.min(latestTime, candidate));
1253
+ }
1208
1254
 
1209
1255
  soupHits++;
1210
- return finalTime;
1256
+ return candidate;
1211
1257
  }
1212
1258
 
1213
1259
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "3.0.4",
3
+ "version": "3.0.5",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "type": "module",
6
6
  "main": "index.js",
package/types.d.ts CHANGED
@@ -133,17 +133,34 @@ export type SCDProp = {
133
133
  };
134
134
 
135
135
  /**
136
- * the soup is a set of parameters that determine the distribution of events over time
136
+ * Soup preset names for common time distribution patterns
137
137
  */
138
- type soup = {
138
+ export type SoupPreset = "steady" | "growth" | "spiky" | "seasonal" | "global" | "churny" | "chaotic";
139
+
140
+ /**
141
+ * Soup configuration object for fine-grained control
142
+ */
143
+ export type SoupConfig = {
144
+ /** Use a named preset as base, then override individual fields */
145
+ preset?: SoupPreset;
139
146
  /** Controls clustering tightness. Higher = tighter peaks. Default: 2 */
140
147
  deviation?: number;
141
- /** Number of time clusters to distribute events across. Default: dynamic (numDays/7, minimum 5) */
148
+ /** Number of time clusters to distribute events across. Default: numDays*2 */
142
149
  peaks?: number;
143
150
  /** Offset for the normal distribution center within each peak. Default: 0 */
144
151
  mean?: number;
152
+ /** Day-of-week weights (7 elements, index 0=Sunday). Normalized max=1.0. Set null to disable. */
153
+ dayOfWeekWeights?: number[] | null;
154
+ /** Hour-of-day weights (24 elements, index 0=midnight UTC). Normalized max=1.0. Set null to disable. */
155
+ hourOfDayWeights?: number[] | null;
145
156
  };
146
157
 
158
+ /**
159
+ * the soup is a set of parameters that determine the distribution of events over time.
160
+ * Can be a preset name string, a config object, or a config object with a preset base.
161
+ */
162
+ type soup = SoupPreset | SoupConfig;
163
+
147
164
  /**
148
165
  * Hook types and when they fire (in order per user):
149
166
  * - "user" — user profile object (mutate in-place, return ignored)