make-mp-data 3.0.3 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +46 -0
  2. package/dungeons/array-of-object-lookup-schema.json +327 -0
  3. package/dungeons/array-of-object-lookup.js +29 -9
  4. package/dungeons/capstone/capstone-ic3.js +291 -0
  5. package/dungeons/capstone/capstone-ic4.js +598 -0
  6. package/dungeons/capstone/capstone-ic5.js +668 -0
  7. package/dungeons/capstone/generate-product-lookup.js +309 -0
  8. package/dungeons/ecommerce-schema.json +462 -0
  9. package/dungeons/{copilot.js → ecommerce.js} +79 -17
  10. package/dungeons/education-schema.json +2409 -0
  11. package/dungeons/education.js +226 -462
  12. package/dungeons/fintech-schema.json +14034 -0
  13. package/dungeons/fintech.js +134 -413
  14. package/dungeons/foobar-schema.json +403 -0
  15. package/dungeons/foobar.js +27 -4
  16. package/dungeons/food-delivery-schema.json +192 -0
  17. package/dungeons/food-delivery.js +602 -0
  18. package/dungeons/food-schema.json +1152 -0
  19. package/dungeons/food.js +173 -406
  20. package/dungeons/gaming-schema.json +1270 -0
  21. package/dungeons/gaming.js +182 -42
  22. package/dungeons/insurance-application-schema.json +204 -0
  23. package/dungeons/insurance-application.js +605 -0
  24. package/dungeons/media-schema.json +906 -0
  25. package/dungeons/media.js +250 -420
  26. package/dungeons/retention-cadence-schema.json +78 -0
  27. package/dungeons/retention-cadence.js +35 -1
  28. package/dungeons/rpg-schema.json +4526 -0
  29. package/dungeons/rpg.js +171 -429
  30. package/dungeons/sanity-schema.json +255 -0
  31. package/dungeons/sanity.js +21 -10
  32. package/dungeons/sass-schema.json +1291 -0
  33. package/dungeons/sass.js +241 -368
  34. package/dungeons/scd-schema.json +919 -0
  35. package/dungeons/scd.js +41 -13
  36. package/dungeons/simple-schema.json +608 -0
  37. package/dungeons/simple.js +52 -15
  38. package/dungeons/simplest-schema.json +1418 -0
  39. package/dungeons/simplest.js +392 -0
  40. package/dungeons/social-schema.json +1118 -0
  41. package/dungeons/social.js +150 -391
  42. package/dungeons/text-generation-schema.json +3096 -0
  43. package/dungeons/text-generation.js +71 -0
  44. package/index.js +8 -6
  45. package/lib/core/config-validator.js +28 -8
  46. package/lib/core/storage.js +5 -5
  47. package/lib/generators/events.js +4 -4
  48. package/lib/orchestrators/mixpanel-sender.js +16 -13
  49. package/lib/orchestrators/user-loop.js +14 -6
  50. package/lib/templates/soup-presets.js +188 -0
  51. package/lib/utils/utils.js +52 -6
  52. package/package.json +1 -1
  53. package/types.d.ts +20 -3
  54. package/dungeons/adspend.js +0 -130
  55. package/dungeons/anon.js +0 -128
  56. package/dungeons/benchmark-heavy.js +0 -240
  57. package/dungeons/benchmark-light.js +0 -140
  58. package/dungeons/big.js +0 -226
  59. package/dungeons/business.js +0 -391
  60. package/dungeons/complex.js +0 -428
  61. package/dungeons/experiments.js +0 -137
  62. package/dungeons/funnels.js +0 -309
  63. package/dungeons/mil.js +0 -323
  64. package/dungeons/mirror.js +0 -161
  65. package/dungeons/soup-test.js +0 -52
  66. package/dungeons/streaming.js +0 -372
  67. package/dungeons/strict-event-test.js +0 -30
  68. package/dungeons/student-teacher.js +0 -438
  69. package/dungeons/too-big-events.js +0 -203
  70. package/dungeons/user-agent.js +0 -209
@@ -12,6 +12,77 @@ const days = 92;
12
12
 
13
13
  /** @typedef {import("../types.js").Dungeon} Dungeon */
14
14
 
15
+ /*
16
+ * ============================================================================
17
+ * DATASET OVERVIEW
18
+ * ============================================================================
19
+ *
20
+ * App: Text Generation Demo — showcases DM4's organic text generation
21
+ * Scale: 8,000 users, ~960K events, 92 days
22
+ *
23
+ * A text-heavy SaaS analytics dungeon that exercises every text generation
24
+ * style available in DM4. Users interact through support tickets, product
25
+ * reviews, forum posts, search queries, chat messages, social media posts
26
+ * (Twitter, LinkedIn, Reddit), bug reports, feature requests, onboarding
27
+ * feedback, charity/wedding comments, and webinar chat.
28
+ *
29
+ * Each event type uses a dedicated createTextGenerator() configured with its
30
+ * own style, tone, formality, keyword banks, typo rates, and authenticity
31
+ * levels. Text generator styles used: support, review, forum, search,
32
+ * feedback, chat, email, tweet, comments (9 distinct styles).
33
+ *
34
+ * Events:
35
+ * social_media_tweet (15) > chat_message (10) > charity_comment_posted (8)
36
+ * > wedding_comment_posted (6) > company_announcement_tweet (3)
37
+ * > all others (1 each): support ticket, review, forum post, search,
38
+ * feedback, email, twitter, linkedin, reddit, bug report, feature
39
+ * request, onboarding, tutorial comment, webinar chat, api thread
40
+ * ============================================================================
41
+ */
42
+
43
+ /*
44
+ * ============================================================================
45
+ * ANALYTICS HOOKS
46
+ * ============================================================================
47
+ *
48
+ * Hook 1: Power User + Churn Risk Classification
49
+ * Type: user
50
+ * What: Users with engagement_score > 70 get is_power_user = true.
51
+ * Users inactive > 20 days get risk_level = "high_churn", else "healthy".
52
+ * Mixpanel report:
53
+ * - Insights > user profile breakdown by "is_power_user"
54
+ * - Insights > user profile breakdown by "risk_level", cross-reference
55
+ * with "user_tier" to see churn risk by plan
56
+ *
57
+ * Hook 2: Critical Ticket Auto-Escalation
58
+ * Type: event
59
+ * What: enterprise_support_ticket events with priority = "critical" get
60
+ * escalation_level bumped by 1 (max 3) and auto_escalated = true.
61
+ * Mixpanel report:
62
+ * - Insights > "enterprise_support_ticket" total events, breakdown by
63
+ * auto_escalated
64
+ * - Expect: critical tickets show auto_escalated = true
65
+ *
66
+ * Hook 3: Critical Bug Flagging
67
+ * Type: event
68
+ * What: bug_report_submitted events with severity = "critical" AND
69
+ * is_reproducible = true get requires_immediate_review = true and a
70
+ * random estimated_fix_hours (1-8).
71
+ * Mixpanel report:
72
+ * - Insights > "bug_report_submitted" total events, breakdown by
73
+ * requires_immediate_review
74
+ * - Expect: only critical + reproducible bugs are flagged
75
+ *
76
+ * Hook 4: Enterprise Satisfaction Survey Injection
77
+ * Type: everything
78
+ * What: Enterprise-tier users with > 5 events get a
79
+ * "satisfaction_survey_triggered" event appended with a 1-10 NPS score.
80
+ * Mixpanel report:
81
+ * - Insights > "satisfaction_survey_triggered" AVG(score), breakdown by
82
+ * product_tier
83
+ * - Expect: only enterprise users have survey events
84
+ * ============================================================================
85
+ */
15
86
 
16
87
  // Enterprise support ticket generator with keywords and high authenticity
17
88
  const enterpriseSupportGen = createTextGenerator({
package/index.js CHANGED
@@ -197,13 +197,16 @@ async function main(config) {
197
197
 
198
198
  // ! DATA GENERATION ENDS HERE
199
199
 
200
+ // Flush when writeToDisk is enabled OR batch mode activated (to capture tail data)
201
+ const shouldFlush = validatedConfig.writeToDisk || context.isBatchMode();
202
+
200
203
  // Step 10: Flush lookup tables to disk (always as CSVs)
201
- if (validatedConfig.writeToDisk) {
204
+ if (shouldFlush) {
202
205
  await flushLookupTablesToDisk(storage, validatedConfig);
203
206
  }
204
207
 
205
- // Step 11: Flush other storage containers to disk (if writeToDisk enabled)
206
- if (validatedConfig.writeToDisk) {
208
+ // Step 11: Flush other storage containers to disk
209
+ if (shouldFlush) {
207
210
  await flushStorageToDisk(storage, validatedConfig);
208
211
  }
209
212
 
@@ -276,7 +279,7 @@ async function generateGroupProfiles(context) {
276
279
  const groupContainer = storage.groupProfilesData[i];
277
280
 
278
281
  if (!groupContainer) {
279
- console.warn(`Warning: No storage container found for group key: ${groupKey}`);
282
+ if (config.verbose) console.warn(`Warning: No storage container found for group key: ${groupKey}`);
280
283
  continue;
281
284
  }
282
285
 
@@ -319,7 +322,7 @@ async function generateLookupTables(context) {
319
322
  const lookupContainer = storage.lookupTableData[i];
320
323
 
321
324
  if (!lookupContainer) {
322
- console.warn(`Warning: No storage container found for lookup table: ${key}`);
325
+ if (config.verbose) console.warn(`Warning: No storage container found for lookup table: ${key}`);
323
326
  continue;
324
327
  }
325
328
 
@@ -551,7 +554,6 @@ async function extractFileInfo(storage, config) {
551
554
  }
552
555
  } catch (error) {
553
556
  // If scanning fails, just return empty array
554
- console.warn('Warning: Could not scan data directory for files:', error.message);
555
557
  }
556
558
  }
557
559
 
@@ -11,6 +11,7 @@
11
11
  import dayjs from "dayjs";
12
12
  import { makeName } from "ak-tools";
13
13
  import * as u from "../utils/utils.js";
14
+ import { resolveSoup } from "../templates/soup-presets.js";
14
15
 
15
16
  /**
16
17
  * Infers funnels from the provided events
@@ -106,7 +107,7 @@ export function validateDungeonConfig(config) {
106
107
  token = null,
107
108
  region = "US",
108
109
  writeToDisk = false,
109
- verbose = true,
110
+ verbose = false,
110
111
  soup = {},
111
112
  hook = (record) => record,
112
113
  hasAdSpend = false,
@@ -136,6 +137,12 @@ export function validateDungeonConfig(config) {
136
137
  concurrency = 1;
137
138
  }
138
139
 
140
+ // Auto-enable batch mode for large datasets to prevent OOM
141
+ if (numEvents >= 2_000_000 && config.batchSize === undefined) {
142
+ batchSize = 1_000_000;
143
+ console.warn(`⚠️ Auto-enabling batch mode: numEvents (${numEvents.toLocaleString()}) >= 2M. Using batchSize of ${batchSize.toLocaleString()}.`);
144
+ }
145
+
139
146
  // Ensure defaults for deep objects
140
147
  if (!config.superProps) config.superProps = superProps;
141
148
  if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
@@ -148,6 +155,17 @@ export function validateDungeonConfig(config) {
148
155
  throw new Error("Either epochStart or numDays must be provided");
149
156
  }
150
157
 
158
+ // Resolve soup presets (must happen after numDays is computed)
159
+ const resolved = resolveSoup(soup, numDays);
160
+ soup = resolved.soup;
161
+ // Apply suggested birth distribution params if not explicitly set by the dungeon
162
+ if (resolved.suggestedBornRecentBias !== undefined && config.bornRecentBias === undefined) {
163
+ config.bornRecentBias = resolved.suggestedBornRecentBias;
164
+ }
165
+ if (resolved.suggestedPercentUsersBornInDataset !== undefined && config.percentUsersBornInDataset === undefined) {
166
+ config.percentUsersBornInDataset = resolved.suggestedPercentUsersBornInDataset;
167
+ }
168
+
151
169
  // Use provided name if non-empty string, otherwise generate one
152
170
  if (!name || name === "") {
153
171
  name = makeName();
@@ -166,15 +184,17 @@ export function validateDungeonConfig(config) {
166
184
  throw new Error('Hook string did not evaluate to a function');
167
185
  }
168
186
  } catch (error) {
169
- console.warn(`\u26a0\ufe0f Failed to convert hook string to function: ${error.message}`);
170
- console.warn('Using default pass-through hook');
187
+ if (config.verbose !== false) {
188
+ console.warn(`\u26a0\ufe0f Failed to convert hook string to function: ${error.message}`);
189
+ console.warn('Using default pass-through hook');
190
+ }
171
191
  hook = (record) => record;
172
192
  }
173
193
  }
174
194
 
175
195
  // Ensure hook is a function
176
196
  if (typeof hook !== 'function') {
177
- console.warn('\u26a0\ufe0f Hook is not a function, using default pass-through hook');
197
+ if (config.verbose !== false) console.warn('\u26a0\ufe0f Hook is not a function, using default pass-through hook');
178
198
  hook = (record) => record;
179
199
  }
180
200
 
@@ -337,7 +357,7 @@ function transformSCDPropsWithoutCredentials(config) {
337
357
  }
338
358
 
339
359
  // UI job without credentials - convert SCD props to regular props
340
- console.log('\u26a0\ufe0f Service account credentials missing - converting SCD properties to static properties');
360
+ if (config.verbose !== false) console.log('\u26a0\ufe0f Service account credentials missing - converting SCD properties to static properties');
341
361
 
342
362
  // Ensure userProps and groupProps exist
343
363
  if (!config.userProps) config.userProps = {};
@@ -356,20 +376,20 @@ function transformSCDPropsWithoutCredentials(config) {
356
376
  if (type === "user") {
357
377
  // Add to userProps
358
378
  config.userProps[propKey] = values;
359
- console.log(` \u2713 Converted user SCD property: ${propKey}`);
379
+ if (config.verbose !== false) console.log(` \u2713 Converted user SCD property: ${propKey}`);
360
380
  } else {
361
381
  // Add to groupProps for the specific group type
362
382
  if (!config.groupProps[type]) {
363
383
  config.groupProps[type] = {};
364
384
  }
365
385
  config.groupProps[type][propKey] = values;
366
- console.log(` \u2713 Converted group SCD property: ${propKey} (${type})`);
386
+ if (config.verbose !== false) console.log(` \u2713 Converted group SCD property: ${propKey} (${type})`);
367
387
  }
368
388
  }
369
389
 
370
390
  // Clear out scdProps since we've converted everything
371
391
  config.scdProps = {};
372
- console.log('\u2713 SCD properties converted to static properties\n');
392
+ if (config.verbose !== false) console.log('\u2713 SCD properties converted to static properties\n');
373
393
  }
374
394
 
375
395
  export { inferFunnels, transformSCDPropsWithoutCredentials };
@@ -366,12 +366,12 @@ export class StorageManager {
366
366
  if (config.writeToDisk === false) {
367
367
  const batchSize = config.batchSize || 1_000_000;
368
368
  const numEvents = config.numEvents || 0;
369
-
369
+
370
370
  if (batchSize < numEvents) {
371
- throw new Error(
372
- `Configuration error: writeToDisk is explicitly set to false but batchSize (${batchSize}) is lower than numEvents (${numEvents}). ` +
373
- `This would result in data loss as batched data would be discarded. ` +
374
- `Either set writeToDisk to true, increase batchSize to be >= numEvents, or provide a Mixpanel token to send data directly.`
371
+ console.warn(
372
+ `⚠️ writeToDisk is false but batchSize (${batchSize.toLocaleString()}) < numEvents (${numEvents.toLocaleString()}). ` +
373
+ `Batch files will be written to disk temporarily to avoid OOM. ` +
374
+ `They will be cleaned up after Mixpanel import if a token is provided.`
375
375
  );
376
376
  }
377
377
  }
@@ -52,9 +52,9 @@ export async function makeEvent(
52
52
  const chance = u.getChance();
53
53
 
54
54
  // Extract soup configuration for time distribution
55
- // Dynamic peaks: one per week for long ranges, minimum 5
56
- const defaultPeaks = Math.max(5, Math.ceil((config.numDays || 30) / 7));
57
- const { mean = 0, deviation = 2, peaks = defaultPeaks } = config.soup || {};
55
+ // Dynamic peaks: enough to flatten DOW interference from chunk boundaries
56
+ const defaultPeaks = Math.max(5, (config.numDays || 30) * 2);
57
+ const { mean = 0, deviation = 2, peaks = defaultPeaks, dayOfWeekWeights, hourOfDayWeights } = /** @type {import('../../types').SoupConfig} */ (config.soup) || {};
58
58
 
59
59
  // Extract feature flags from config
60
60
  const {
@@ -102,7 +102,7 @@ export async function makeEvent(
102
102
  shiftedTimestamp = earliestTime + context.TIME_SHIFT_SECONDS;
103
103
  } else {
104
104
  // TimeSoup returns unix seconds; shift and convert to ISO once
105
- const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
105
+ const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean, dayOfWeekWeights, hourOfDayWeights, context.TIME_SHIFT_SECONDS);
106
106
  shiftedTimestamp = soupTimestamp + context.TIME_SHIFT_SECONDS;
107
107
  }
108
108
  // Drop events that would land in the future (Mixpanel rewrites these to "now", causing pile-ups)
@@ -6,7 +6,6 @@
6
6
  /** @typedef {import('../../types').Context} Context */
7
7
 
8
8
  import dayjs from "dayjs";
9
- import path from "path";
10
9
  import { comma, ls, rm } from "ak-tools";
11
10
  import * as u from "../utils/utils.js";
12
11
  import mp from "mixpanel-import";
@@ -22,8 +21,6 @@ export async function sendToMixpanel(context) {
22
21
  adSpendData,
23
22
  eventData,
24
23
  groupProfilesData,
25
- lookupTableData,
26
- mirrorEventData,
27
24
  scdTableData,
28
25
  userProfilesData,
29
26
  groupEventData
@@ -41,7 +38,7 @@ export async function sendToMixpanel(context) {
41
38
 
42
39
  const importResults = { events: {}, users: {}, groups: [] };
43
40
  const isBATCH_MODE = context.isBatchMode();
44
- const NODE_ENV = process.env.NODE_ENV || "unknown";
41
+ _verbose = config.verbose !== false;
45
42
 
46
43
  /** @type {import('mixpanel-import').Creds} */
47
44
  const creds = { token };
@@ -236,15 +233,20 @@ export async function sendToMixpanel(context) {
236
233
  if (!writeToDisk && isBATCH_MODE) {
237
234
  const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
238
235
  if (writeDir) {
236
+ const configName = context.config.name;
239
237
  const listDir = await ls(writeDir);
240
238
  // @ts-ignore
241
- const files = listDir.filter(f =>
242
- f.includes('-EVENTS') ||
243
- f.includes('-USERS') ||
244
- f.includes('-ADSPEND') ||
245
- f.includes('-GROUPS') ||
246
- f.includes('-GROUP-EVENTS')
247
- );
239
+ const files = listDir.filter(f => {
240
+ if (configName && !f.includes(configName)) return false;
241
+ return f.includes('-EVENTS') ||
242
+ f.includes('-USERS') ||
243
+ f.includes('-ADSPEND') ||
244
+ f.includes('-GROUPS') ||
245
+ f.includes('-GROUP-EVENTS') ||
246
+ f.includes('-SCD') ||
247
+ f.includes('-MIRROR') ||
248
+ f.includes('-LOOKUP');
249
+ });
248
250
  for (const file of files) {
249
251
  await rm(file);
250
252
  }
@@ -255,9 +257,10 @@ export async function sendToMixpanel(context) {
255
257
  }
256
258
 
257
259
  /**
258
- * Simple logging function
260
+ * Logging function that respects verbose config
259
261
  * @param {string} message - Message to log
260
262
  */
263
+ let _verbose = true;
261
264
  function log(message) {
262
- console.log(message);
265
+ if (_verbose) console.log(message);
263
266
  }
@@ -59,6 +59,7 @@ export async function userLoop(context) {
59
59
  let cancelled = false;
60
60
  const onSigint = () => {
61
61
  cancelled = true;
62
+ USER_CONN.clearQueue();
62
63
  if (verbose) console.log(`\n\nStopping generation (Ctrl+C)...\n`);
63
64
  };
64
65
  process.on('SIGINT', onSigint);
@@ -109,7 +110,7 @@ export async function userLoop(context) {
109
110
  if (userIsBornInDataset) {
110
111
  let biasedCreated = dayjs(created).subtract(daysShift, 'd');
111
112
 
112
- if (bornRecentBias > 0) {
113
+ if (bornRecentBias !== 0) {
113
114
  // Calculate how far into the dataset this user was born (0 = start, 1 = end/recent)
114
115
  const datasetStart = dayjs.unix(global.FIXED_BEGIN);
115
116
  const datasetEnd = dayjs.unix(context.FIXED_NOW);
@@ -117,10 +118,17 @@ export async function userLoop(context) {
117
118
  // Clamp userPosition to [0, 1] to handle edge cases from rounding in time calculations
118
119
  const userPosition = Math.max(0, Math.min(1, biasedCreated.diff(datasetStart) / totalDuration));
119
120
 
120
- // Apply power function to bias toward recent (higher values)
121
- // exponent < 1 shifts distribution toward 1 (recent)
122
- const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
123
- const biasedPosition = Math.pow(userPosition, exponent);
121
+ let biasedPosition;
122
+ if (bornRecentBias > 0) {
123
+ // Positive bias: exponent < 1 shifts distribution toward 1 (recent)
124
+ const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
125
+ biasedPosition = Math.pow(userPosition, exponent);
126
+ } else {
127
+ // Negative bias: mirror the power function to shift toward 0 (early)
128
+ // -0.3 bias -> 0.79 exponent applied to (1 - position), then mirrored back
129
+ const exponent = 1 - (Math.abs(bornRecentBias) * 0.7);
130
+ biasedPosition = 1 - Math.pow(1 - userPosition, exponent);
131
+ }
124
132
 
125
133
  // Convert back to timestamp
126
134
  biasedCreated = datasetStart.add(biasedPosition * totalDuration, 'millisecond');
@@ -233,7 +241,7 @@ export async function userLoop(context) {
233
241
 
234
242
  // ALL SUBSEQUENT EVENTS (funnels for converted users, standalone for all)
235
243
  let userChurned = false;
236
- while (numEventsPreformed < numEventsThisUserWillPreform) {
244
+ while (numEventsPreformed < numEventsThisUserWillPreform && !cancelled) {
237
245
  let newEvents;
238
246
  if (usageFunnels.length && userConverted) {
239
247
  const currentFunnel = chance.pickone(usageFunnels);
@@ -0,0 +1,188 @@
1
+ /**
2
+ * TimeSoup preset configurations
3
+ * Each preset defines time distribution parameters that produce distinct patterns.
4
+ *
5
+ * Parameters:
6
+ * - peaks(numDays): function returning number of Gaussian clusters
7
+ * - deviation: controls peak width (higher = tighter)
8
+ * - mean: offset from chunk center (0 = centered)
9
+ * - dayOfWeekWeights: 7-element array [Sun..Sat], max=1.0, null to disable
10
+ * - hourOfDayWeights: 24-element array [0h..23h UTC], max=1.0, null to disable
11
+ *
12
+ * Some presets also suggest bornRecentBias and percentUsersBornInDataset,
13
+ * but those are top-level dungeon config — presets only set them if not already specified.
14
+ */
15
+
16
+ // Real-world Mixpanel DOW pattern: weekday-heavy, Saturday valley
17
+ export const REAL_DOW = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
18
+
19
+ // Real-world Mixpanel HOD pattern: early-morning peak (UTC), afternoon valley
20
+ export const REAL_HOD = [
21
+ 0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
22
+ 0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
23
+ 0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
24
+ ];
25
+
26
+ // Flat weights (no cyclical pattern)
27
+ export const FLAT_DOW = [1, 1, 1, 1, 1, 1, 1];
28
+ export const FLAT_HOD = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
29
+
30
+ /** @type {Record<string, {peaks: (numDays: number) => number, deviation: number, mean: number, dayOfWeekWeights: number[]|null, hourOfDayWeights: number[]|null, bornRecentBias?: number, percentUsersBornInDataset?: number}>} */
31
+ export const SOUP_PRESETS = {
32
+ /**
33
+ * steady — Mature SaaS / Stable Product
34
+ * Nearly flat day-over-day, slight weekly pattern, minimal growth trend.
35
+ */
36
+ steady: {
37
+ peaks: (numDays) => Math.max(5, numDays * 2),
38
+ deviation: 1.5,
39
+ mean: 0,
40
+ dayOfWeekWeights: REAL_DOW,
41
+ hourOfDayWeights: REAL_HOD,
42
+ bornRecentBias: 0.1,
43
+ percentUsersBornInDataset: 10,
44
+ },
45
+
46
+ /**
47
+ * growth — Growing Startup (DEFAULT)
48
+ * Gradual uptrend with visible weekly peaks. This is the default behavior.
49
+ */
50
+ growth: {
51
+ peaks: (numDays) => Math.max(5, numDays * 2),
52
+ deviation: 2,
53
+ mean: 0,
54
+ dayOfWeekWeights: REAL_DOW,
55
+ hourOfDayWeights: REAL_HOD,
56
+ bornRecentBias: 0.3,
57
+ percentUsersBornInDataset: 15,
58
+ },
59
+
60
+ /**
61
+ * spiky — Event-Driven / Bursty
62
+ * Clear peaks and valleys, dramatic variation. Fewer Gaussian clusters + tight deviation.
63
+ */
64
+ spiky: {
65
+ peaks: (numDays) => Math.max(5, Math.ceil(numDays / 10)),
66
+ deviation: 3.5,
67
+ mean: 0,
68
+ dayOfWeekWeights: REAL_DOW,
69
+ hourOfDayWeights: REAL_HOD,
70
+ bornRecentBias: 0.3,
71
+ percentUsersBornInDataset: 20,
72
+ },
73
+
74
+ /**
75
+ * seasonal — Strong Cyclical Patterns
76
+ * 3-4 major waves across the dataset. Very few peaks create dramatic macro trends.
77
+ */
78
+ seasonal: {
79
+ peaks: () => 4,
80
+ deviation: 2.5,
81
+ mean: 0,
82
+ dayOfWeekWeights: REAL_DOW,
83
+ hourOfDayWeights: REAL_HOD,
84
+ bornRecentBias: 0.2,
85
+ percentUsersBornInDataset: 25,
86
+ },
87
+
88
+ /**
89
+ * global — Distributed Users Across Timezones
90
+ * Very flat hourly + daily distribution. No cyclical patterns.
91
+ */
92
+ global: {
93
+ peaks: (numDays) => Math.max(5, numDays * 2),
94
+ deviation: 1,
95
+ mean: 0,
96
+ dayOfWeekWeights: FLAT_DOW,
97
+ hourOfDayWeights: FLAT_HOD,
98
+ bornRecentBias: 0,
99
+ percentUsersBornInDataset: 10,
100
+ },
101
+
102
+ /**
103
+ * churny — High Churn / Declining Product
104
+ * Flat distribution (no growth trend). All users pre-exist the dataset,
105
+ * so there's no acceleration. Combine with an "everything" hook that
106
+ * filters late events to create a true declining shape.
107
+ */
108
+ churny: {
109
+ peaks: (numDays) => Math.max(5, numDays * 2),
110
+ deviation: 2,
111
+ mean: 0,
112
+ dayOfWeekWeights: REAL_DOW,
113
+ hourOfDayWeights: REAL_HOD,
114
+ bornRecentBias: 0,
115
+ percentUsersBornInDataset: 5,
116
+ },
117
+
118
+ /**
119
+ * chaotic — Unpredictable / Irregular Patterns
120
+ * Few peaks + very tight clustering = dramatic bursts separated by quiet stretches.
121
+ */
122
+ chaotic: {
123
+ peaks: (numDays) => Math.max(3, Math.ceil(numDays / 20)),
124
+ deviation: 4,
125
+ mean: 0,
126
+ dayOfWeekWeights: REAL_DOW,
127
+ hourOfDayWeights: REAL_HOD,
128
+ bornRecentBias: 0.5,
129
+ percentUsersBornInDataset: 40,
130
+ },
131
+ };
132
+
133
+ /** @type {string[]} */
134
+ export const PRESET_NAMES = Object.keys(SOUP_PRESETS);
135
+
136
+ /**
137
+ * Resolves a soup config — handles string presets, preset+overrides, and raw objects.
138
+ * @param {string | object} soup - Soup config from dungeon
139
+ * @param {number} numDays - Number of days in the dataset
140
+ * @returns {{ soup: object, suggestedBornRecentBias?: number, suggestedPercentUsersBornInDataset?: number }}
141
+ */
142
+ export function resolveSoup(soup, numDays) {
143
+ if (!soup) return { soup: {} };
144
+
145
+ // String preset: "growth", "spiky", etc.
146
+ if (typeof soup === 'string') {
147
+ const preset = SOUP_PRESETS[soup];
148
+ if (!preset) {
149
+ throw new Error(`Unknown soup preset: "${soup}". Valid presets: ${PRESET_NAMES.join(', ')}`);
150
+ }
151
+ return {
152
+ soup: {
153
+ peaks: preset.peaks(numDays),
154
+ deviation: preset.deviation,
155
+ mean: preset.mean,
156
+ dayOfWeekWeights: preset.dayOfWeekWeights,
157
+ hourOfDayWeights: preset.hourOfDayWeights,
158
+ },
159
+ suggestedBornRecentBias: preset.bornRecentBias,
160
+ suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
161
+ };
162
+ }
163
+
164
+ // Object with preset key: { preset: "growth", deviation: 3 }
165
+ if (typeof soup === 'object' && soup.preset) {
166
+ const preset = SOUP_PRESETS[soup.preset];
167
+ if (!preset) {
168
+ throw new Error(`Unknown soup preset: "${soup.preset}". Valid presets: ${PRESET_NAMES.join(', ')}`);
169
+ }
170
+ const base = {
171
+ peaks: preset.peaks(numDays),
172
+ deviation: preset.deviation,
173
+ mean: preset.mean,
174
+ dayOfWeekWeights: preset.dayOfWeekWeights,
175
+ hourOfDayWeights: preset.hourOfDayWeights,
176
+ };
177
+ // Apply overrides (excluding the 'preset' key itself)
178
+ const { preset: _, ...overrides } = soup;
179
+ return {
180
+ soup: { ...base, ...overrides },
181
+ suggestedBornRecentBias: preset.bornRecentBias,
182
+ suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
183
+ };
184
+ }
185
+
186
+ // Raw object: { peaks: 10, deviation: 2 } — pass through unchanged
187
+ return { soup };
188
+ }
@@ -1180,7 +1180,17 @@ let soupHits = 0;
1180
1180
  * Divides the range into `peaks` chunks, picks one randomly, then samples within it.
1181
1181
  * Returns unix seconds (not ISO string) for performance — caller converts once.
1182
1182
  */
1183
- function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0) {
1183
+ // Default day-of-week weights (0=Sun, 1=Mon, ..., 6=Sat) derived from real Mixpanel data
1184
+ const DEFAULT_DOW_WEIGHTS = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
1185
+
1186
+ // Default hour-of-day weights (0=midnight, ..., 23=11pm UTC) — derived from real Mixpanel data
1187
+ const DEFAULT_HOD_WEIGHTS = [
1188
+ 0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
1189
+ 0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
1190
+ 0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
1191
+ ];
1192
+
1193
+ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0, dayOfWeekWeights = DEFAULT_DOW_WEIGHTS, hourOfDayWeights = DEFAULT_HOD_WEIGHTS, timeShiftSeconds = 0) {
1184
1194
  if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
1185
1195
  if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
1186
1196
  const chance = getChance();
@@ -1193,21 +1203,57 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
1193
1203
  }
1194
1204
  const chunkSize = totalRange / peaks;
1195
1205
 
1196
- // Select a random chunk based on the number of peaks
1206
+ // Phase 1: Gaussian chunk sampling (macro trend across the time range)
1197
1207
  const peakIndex = integer(0, peaks - 1);
1198
1208
  const chunkStart = earliestTime + peakIndex * chunkSize;
1199
1209
  const chunkEnd = chunkStart + chunkSize;
1200
1210
  const chunkMid = (chunkStart + chunkEnd) / 2;
1201
-
1202
- // Generate offset from normal distribution, clamp to chunk boundaries
1203
1211
  const maxDeviation = chunkSize / deviation;
1204
1212
  const offset = chance.normal({ mean: mean, dev: maxDeviation });
1205
1213
  const proposedTime = chunkMid + offset;
1206
1214
  const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
1207
- const finalTime = Math.max(earliestTime, Math.min(latestTime, clampedTime));
1215
+ let candidate = Math.max(earliestTime, Math.min(latestTime, clampedTime));
1216
+
1217
+ // Phase 2: DOW accept/reject — retry if day-of-week doesn't pass weight check
1218
+ if (dayOfWeekWeights) {
1219
+ for (let attempt = 0; attempt < 50; attempt++) {
1220
+ const dow = new Date((candidate + timeShiftSeconds) * 1000).getUTCDay();
1221
+ if (chance.random() < dayOfWeekWeights[dow]) break;
1222
+ // Rejected — resample from Gaussian chunks
1223
+ const pi = integer(0, peaks - 1);
1224
+ const cs = earliestTime + pi * chunkSize;
1225
+ const ce = cs + chunkSize;
1226
+ const cm = (cs + ce) / 2;
1227
+ const md = chunkSize / deviation;
1228
+ const off = chance.normal({ mean: mean, dev: md });
1229
+ const pt = cm + off;
1230
+ candidate = Math.max(earliestTime, Math.min(latestTime, Math.max(cs, Math.min(ce, pt))));
1231
+ }
1232
+ }
1233
+
1234
+ // Phase 3: Redistribute hour-of-day (changes only hour within same day)
1235
+ if (hourOfDayWeights) {
1236
+ const shifted = candidate + timeShiftSeconds;
1237
+ const d = new Date(shifted * 1000);
1238
+ const currentMinute = d.getUTCMinutes();
1239
+ const currentSecond = d.getUTCSeconds();
1240
+
1241
+ const totalHodWeight = hourOfDayWeights.reduce((s, w) => s + w, 0);
1242
+ let roll = chance.random() * totalHodWeight;
1243
+ let newHour = 0;
1244
+ for (let h = 0; h < 24; h++) {
1245
+ roll -= hourOfDayWeights[h];
1246
+ if (roll <= 0) { newHour = h; break; }
1247
+ }
1248
+
1249
+ const dayStartShifted = Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()) / 1000;
1250
+ const newShifted = dayStartShifted + newHour * 3600 + currentMinute * 60 + currentSecond;
1251
+ candidate = newShifted - timeShiftSeconds;
1252
+ candidate = Math.max(earliestTime, Math.min(latestTime, candidate));
1253
+ }
1208
1254
 
1209
1255
  soupHits++;
1210
- return finalTime;
1256
+ return candidate;
1211
1257
  }
1212
1258
 
1213
1259
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "3.0.3",
3
+ "version": "3.0.5",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "type": "module",
6
6
  "main": "index.js",