make-mp-data 3.0.3 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/dungeons/array-of-object-lookup-schema.json +327 -0
- package/dungeons/array-of-object-lookup.js +29 -9
- package/dungeons/capstone/capstone-ic3.js +291 -0
- package/dungeons/capstone/capstone-ic4.js +598 -0
- package/dungeons/capstone/capstone-ic5.js +668 -0
- package/dungeons/capstone/generate-product-lookup.js +309 -0
- package/dungeons/ecommerce-schema.json +462 -0
- package/dungeons/{copilot.js → ecommerce.js} +79 -17
- package/dungeons/education-schema.json +2409 -0
- package/dungeons/education.js +226 -462
- package/dungeons/fintech-schema.json +14034 -0
- package/dungeons/fintech.js +134 -413
- package/dungeons/foobar-schema.json +403 -0
- package/dungeons/foobar.js +27 -4
- package/dungeons/food-delivery-schema.json +192 -0
- package/dungeons/food-delivery.js +602 -0
- package/dungeons/food-schema.json +1152 -0
- package/dungeons/food.js +173 -406
- package/dungeons/gaming-schema.json +1270 -0
- package/dungeons/gaming.js +182 -42
- package/dungeons/insurance-application-schema.json +204 -0
- package/dungeons/insurance-application.js +605 -0
- package/dungeons/media-schema.json +906 -0
- package/dungeons/media.js +250 -420
- package/dungeons/retention-cadence-schema.json +78 -0
- package/dungeons/retention-cadence.js +35 -1
- package/dungeons/rpg-schema.json +4526 -0
- package/dungeons/rpg.js +171 -429
- package/dungeons/sanity-schema.json +255 -0
- package/dungeons/sanity.js +21 -10
- package/dungeons/sass-schema.json +1291 -0
- package/dungeons/sass.js +241 -368
- package/dungeons/scd-schema.json +919 -0
- package/dungeons/scd.js +41 -13
- package/dungeons/simple-schema.json +608 -0
- package/dungeons/simple.js +52 -15
- package/dungeons/simplest-schema.json +1418 -0
- package/dungeons/simplest.js +392 -0
- package/dungeons/social-schema.json +1118 -0
- package/dungeons/social.js +150 -391
- package/dungeons/text-generation-schema.json +3096 -0
- package/dungeons/text-generation.js +71 -0
- package/index.js +8 -6
- package/lib/core/config-validator.js +28 -8
- package/lib/core/storage.js +5 -5
- package/lib/generators/events.js +4 -4
- package/lib/orchestrators/mixpanel-sender.js +16 -13
- package/lib/orchestrators/user-loop.js +14 -6
- package/lib/templates/soup-presets.js +188 -0
- package/lib/utils/utils.js +52 -6
- package/package.json +1 -1
- package/types.d.ts +20 -3
- package/dungeons/adspend.js +0 -130
- package/dungeons/anon.js +0 -128
- package/dungeons/benchmark-heavy.js +0 -240
- package/dungeons/benchmark-light.js +0 -140
- package/dungeons/big.js +0 -226
- package/dungeons/business.js +0 -391
- package/dungeons/complex.js +0 -428
- package/dungeons/experiments.js +0 -137
- package/dungeons/funnels.js +0 -309
- package/dungeons/mil.js +0 -323
- package/dungeons/mirror.js +0 -161
- package/dungeons/soup-test.js +0 -52
- package/dungeons/streaming.js +0 -372
- package/dungeons/strict-event-test.js +0 -30
- package/dungeons/student-teacher.js +0 -438
- package/dungeons/too-big-events.js +0 -203
- package/dungeons/user-agent.js +0 -209
|
@@ -12,6 +12,77 @@ const days = 92;
|
|
|
12
12
|
|
|
13
13
|
/** @typedef {import("../types.js").Dungeon} Dungeon */
|
|
14
14
|
|
|
15
|
+
/*
|
|
16
|
+
* ============================================================================
|
|
17
|
+
* DATASET OVERVIEW
|
|
18
|
+
* ============================================================================
|
|
19
|
+
*
|
|
20
|
+
* App: Text Generation Demo — showcases DM4's organic text generation
|
|
21
|
+
* Scale: 8,000 users, ~960K events, 92 days
|
|
22
|
+
*
|
|
23
|
+
* A text-heavy SaaS analytics dungeon that exercises every text generation
|
|
24
|
+
* style available in DM4. Users interact through support tickets, product
|
|
25
|
+
* reviews, forum posts, search queries, chat messages, social media posts
|
|
26
|
+
* (Twitter, LinkedIn, Reddit), bug reports, feature requests, onboarding
|
|
27
|
+
* feedback, charity/wedding comments, and webinar chat.
|
|
28
|
+
*
|
|
29
|
+
* Each event type uses a dedicated createTextGenerator() configured with its
|
|
30
|
+
* own style, tone, formality, keyword banks, typo rates, and authenticity
|
|
31
|
+
* levels. Text generator styles used: support, review, forum, search,
|
|
32
|
+
* feedback, chat, email, tweet, comments (9 distinct styles).
|
|
33
|
+
*
|
|
34
|
+
* Events:
|
|
35
|
+
* social_media_tweet (15) > chat_message (10) > charity_comment_posted (8)
|
|
36
|
+
* > wedding_comment_posted (6) > company_announcement_tweet (3)
|
|
37
|
+
* > all others (1 each): support ticket, review, forum post, search,
|
|
38
|
+
* feedback, email, twitter, linkedin, reddit, bug report, feature
|
|
39
|
+
* request, onboarding, tutorial comment, webinar chat, api thread
|
|
40
|
+
* ============================================================================
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
/*
|
|
44
|
+
* ============================================================================
|
|
45
|
+
* ANALYTICS HOOKS
|
|
46
|
+
* ============================================================================
|
|
47
|
+
*
|
|
48
|
+
* Hook 1: Power User + Churn Risk Classification
|
|
49
|
+
* Type: user
|
|
50
|
+
* What: Users with engagement_score > 70 get is_power_user = true.
|
|
51
|
+
* Users inactive > 20 days get risk_level = "high_churn", else "healthy".
|
|
52
|
+
* Mixpanel report:
|
|
53
|
+
* - Insights > user profile breakdown by "is_power_user"
|
|
54
|
+
* - Insights > user profile breakdown by "risk_level", cross-reference
|
|
55
|
+
* with "user_tier" to see churn risk by plan
|
|
56
|
+
*
|
|
57
|
+
* Hook 2: Critical Ticket Auto-Escalation
|
|
58
|
+
* Type: event
|
|
59
|
+
* What: enterprise_support_ticket events with priority = "critical" get
|
|
60
|
+
* escalation_level bumped by 1 (max 3) and auto_escalated = true.
|
|
61
|
+
* Mixpanel report:
|
|
62
|
+
* - Insights > "enterprise_support_ticket" total events, breakdown by
|
|
63
|
+
* auto_escalated
|
|
64
|
+
* - Expect: critical tickets show auto_escalated = true
|
|
65
|
+
*
|
|
66
|
+
* Hook 3: Critical Bug Flagging
|
|
67
|
+
* Type: event
|
|
68
|
+
* What: bug_report_submitted events with severity = "critical" AND
|
|
69
|
+
* is_reproducible = true get requires_immediate_review = true and a
|
|
70
|
+
* random estimated_fix_hours (1-8).
|
|
71
|
+
* Mixpanel report:
|
|
72
|
+
* - Insights > "bug_report_submitted" total events, breakdown by
|
|
73
|
+
* requires_immediate_review
|
|
74
|
+
* - Expect: only critical + reproducible bugs are flagged
|
|
75
|
+
*
|
|
76
|
+
* Hook 4: Enterprise Satisfaction Survey Injection
|
|
77
|
+
* Type: everything
|
|
78
|
+
* What: Enterprise-tier users with > 5 events get a
|
|
79
|
+
* "satisfaction_survey_triggered" event appended with a 1-10 NPS score.
|
|
80
|
+
* Mixpanel report:
|
|
81
|
+
* - Insights > "satisfaction_survey_triggered" AVG(score), breakdown by
|
|
82
|
+
* product_tier
|
|
83
|
+
* - Expect: only enterprise users have survey events
|
|
84
|
+
* ============================================================================
|
|
85
|
+
*/
|
|
15
86
|
|
|
16
87
|
// Enterprise support ticket generator with keywords and high authenticity
|
|
17
88
|
const enterpriseSupportGen = createTextGenerator({
|
package/index.js
CHANGED
|
@@ -197,13 +197,16 @@ async function main(config) {
|
|
|
197
197
|
|
|
198
198
|
// ! DATA GENERATION ENDS HERE
|
|
199
199
|
|
|
200
|
+
// Flush when writeToDisk is enabled OR batch mode activated (to capture tail data)
|
|
201
|
+
const shouldFlush = validatedConfig.writeToDisk || context.isBatchMode();
|
|
202
|
+
|
|
200
203
|
// Step 10: Flush lookup tables to disk (always as CSVs)
|
|
201
|
-
if (
|
|
204
|
+
if (shouldFlush) {
|
|
202
205
|
await flushLookupTablesToDisk(storage, validatedConfig);
|
|
203
206
|
}
|
|
204
207
|
|
|
205
|
-
// Step 11: Flush other storage containers to disk
|
|
206
|
-
if (
|
|
208
|
+
// Step 11: Flush other storage containers to disk
|
|
209
|
+
if (shouldFlush) {
|
|
207
210
|
await flushStorageToDisk(storage, validatedConfig);
|
|
208
211
|
}
|
|
209
212
|
|
|
@@ -276,7 +279,7 @@ async function generateGroupProfiles(context) {
|
|
|
276
279
|
const groupContainer = storage.groupProfilesData[i];
|
|
277
280
|
|
|
278
281
|
if (!groupContainer) {
|
|
279
|
-
console.warn(`Warning: No storage container found for group key: ${groupKey}`);
|
|
282
|
+
if (config.verbose) console.warn(`Warning: No storage container found for group key: ${groupKey}`);
|
|
280
283
|
continue;
|
|
281
284
|
}
|
|
282
285
|
|
|
@@ -319,7 +322,7 @@ async function generateLookupTables(context) {
|
|
|
319
322
|
const lookupContainer = storage.lookupTableData[i];
|
|
320
323
|
|
|
321
324
|
if (!lookupContainer) {
|
|
322
|
-
console.warn(`Warning: No storage container found for lookup table: ${key}`);
|
|
325
|
+
if (config.verbose) console.warn(`Warning: No storage container found for lookup table: ${key}`);
|
|
323
326
|
continue;
|
|
324
327
|
}
|
|
325
328
|
|
|
@@ -551,7 +554,6 @@ async function extractFileInfo(storage, config) {
|
|
|
551
554
|
}
|
|
552
555
|
} catch (error) {
|
|
553
556
|
// If scanning fails, just return empty array
|
|
554
|
-
console.warn('Warning: Could not scan data directory for files:', error.message);
|
|
555
557
|
}
|
|
556
558
|
}
|
|
557
559
|
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import dayjs from "dayjs";
|
|
12
12
|
import { makeName } from "ak-tools";
|
|
13
13
|
import * as u from "../utils/utils.js";
|
|
14
|
+
import { resolveSoup } from "../templates/soup-presets.js";
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* Infers funnels from the provided events
|
|
@@ -106,7 +107,7 @@ export function validateDungeonConfig(config) {
|
|
|
106
107
|
token = null,
|
|
107
108
|
region = "US",
|
|
108
109
|
writeToDisk = false,
|
|
109
|
-
verbose =
|
|
110
|
+
verbose = false,
|
|
110
111
|
soup = {},
|
|
111
112
|
hook = (record) => record,
|
|
112
113
|
hasAdSpend = false,
|
|
@@ -136,6 +137,12 @@ export function validateDungeonConfig(config) {
|
|
|
136
137
|
concurrency = 1;
|
|
137
138
|
}
|
|
138
139
|
|
|
140
|
+
// Auto-enable batch mode for large datasets to prevent OOM
|
|
141
|
+
if (numEvents >= 2_000_000 && config.batchSize === undefined) {
|
|
142
|
+
batchSize = 1_000_000;
|
|
143
|
+
console.warn(`⚠️ Auto-enabling batch mode: numEvents (${numEvents.toLocaleString()}) >= 2M. Using batchSize of ${batchSize.toLocaleString()}.`);
|
|
144
|
+
}
|
|
145
|
+
|
|
139
146
|
// Ensure defaults for deep objects
|
|
140
147
|
if (!config.superProps) config.superProps = superProps;
|
|
141
148
|
if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
|
|
@@ -148,6 +155,17 @@ export function validateDungeonConfig(config) {
|
|
|
148
155
|
throw new Error("Either epochStart or numDays must be provided");
|
|
149
156
|
}
|
|
150
157
|
|
|
158
|
+
// Resolve soup presets (must happen after numDays is computed)
|
|
159
|
+
const resolved = resolveSoup(soup, numDays);
|
|
160
|
+
soup = resolved.soup;
|
|
161
|
+
// Apply suggested birth distribution params if not explicitly set by the dungeon
|
|
162
|
+
if (resolved.suggestedBornRecentBias !== undefined && config.bornRecentBias === undefined) {
|
|
163
|
+
config.bornRecentBias = resolved.suggestedBornRecentBias;
|
|
164
|
+
}
|
|
165
|
+
if (resolved.suggestedPercentUsersBornInDataset !== undefined && config.percentUsersBornInDataset === undefined) {
|
|
166
|
+
config.percentUsersBornInDataset = resolved.suggestedPercentUsersBornInDataset;
|
|
167
|
+
}
|
|
168
|
+
|
|
151
169
|
// Use provided name if non-empty string, otherwise generate one
|
|
152
170
|
if (!name || name === "") {
|
|
153
171
|
name = makeName();
|
|
@@ -166,15 +184,17 @@ export function validateDungeonConfig(config) {
|
|
|
166
184
|
throw new Error('Hook string did not evaluate to a function');
|
|
167
185
|
}
|
|
168
186
|
} catch (error) {
|
|
169
|
-
|
|
170
|
-
|
|
187
|
+
if (config.verbose !== false) {
|
|
188
|
+
console.warn(`\u26a0\ufe0f Failed to convert hook string to function: ${error.message}`);
|
|
189
|
+
console.warn('Using default pass-through hook');
|
|
190
|
+
}
|
|
171
191
|
hook = (record) => record;
|
|
172
192
|
}
|
|
173
193
|
}
|
|
174
194
|
|
|
175
195
|
// Ensure hook is a function
|
|
176
196
|
if (typeof hook !== 'function') {
|
|
177
|
-
console.warn('\u26a0\ufe0f Hook is not a function, using default pass-through hook');
|
|
197
|
+
if (config.verbose !== false) console.warn('\u26a0\ufe0f Hook is not a function, using default pass-through hook');
|
|
178
198
|
hook = (record) => record;
|
|
179
199
|
}
|
|
180
200
|
|
|
@@ -337,7 +357,7 @@ function transformSCDPropsWithoutCredentials(config) {
|
|
|
337
357
|
}
|
|
338
358
|
|
|
339
359
|
// UI job without credentials - convert SCD props to regular props
|
|
340
|
-
console.log('\u26a0\ufe0f Service account credentials missing - converting SCD properties to static properties');
|
|
360
|
+
if (config.verbose !== false) console.log('\u26a0\ufe0f Service account credentials missing - converting SCD properties to static properties');
|
|
341
361
|
|
|
342
362
|
// Ensure userProps and groupProps exist
|
|
343
363
|
if (!config.userProps) config.userProps = {};
|
|
@@ -356,20 +376,20 @@ function transformSCDPropsWithoutCredentials(config) {
|
|
|
356
376
|
if (type === "user") {
|
|
357
377
|
// Add to userProps
|
|
358
378
|
config.userProps[propKey] = values;
|
|
359
|
-
console.log(` \u2713 Converted user SCD property: ${propKey}`);
|
|
379
|
+
if (config.verbose !== false) console.log(` \u2713 Converted user SCD property: ${propKey}`);
|
|
360
380
|
} else {
|
|
361
381
|
// Add to groupProps for the specific group type
|
|
362
382
|
if (!config.groupProps[type]) {
|
|
363
383
|
config.groupProps[type] = {};
|
|
364
384
|
}
|
|
365
385
|
config.groupProps[type][propKey] = values;
|
|
366
|
-
console.log(` \u2713 Converted group SCD property: ${propKey} (${type})`);
|
|
386
|
+
if (config.verbose !== false) console.log(` \u2713 Converted group SCD property: ${propKey} (${type})`);
|
|
367
387
|
}
|
|
368
388
|
}
|
|
369
389
|
|
|
370
390
|
// Clear out scdProps since we've converted everything
|
|
371
391
|
config.scdProps = {};
|
|
372
|
-
console.log('\u2713 SCD properties converted to static properties\n');
|
|
392
|
+
if (config.verbose !== false) console.log('\u2713 SCD properties converted to static properties\n');
|
|
373
393
|
}
|
|
374
394
|
|
|
375
395
|
export { inferFunnels, transformSCDPropsWithoutCredentials };
|
package/lib/core/storage.js
CHANGED
|
@@ -366,12 +366,12 @@ export class StorageManager {
|
|
|
366
366
|
if (config.writeToDisk === false) {
|
|
367
367
|
const batchSize = config.batchSize || 1_000_000;
|
|
368
368
|
const numEvents = config.numEvents || 0;
|
|
369
|
-
|
|
369
|
+
|
|
370
370
|
if (batchSize < numEvents) {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
`
|
|
374
|
-
`
|
|
371
|
+
console.warn(
|
|
372
|
+
`⚠️ writeToDisk is false but batchSize (${batchSize.toLocaleString()}) < numEvents (${numEvents.toLocaleString()}). ` +
|
|
373
|
+
`Batch files will be written to disk temporarily to avoid OOM. ` +
|
|
374
|
+
`They will be cleaned up after Mixpanel import if a token is provided.`
|
|
375
375
|
);
|
|
376
376
|
}
|
|
377
377
|
}
|
package/lib/generators/events.js
CHANGED
|
@@ -52,9 +52,9 @@ export async function makeEvent(
|
|
|
52
52
|
const chance = u.getChance();
|
|
53
53
|
|
|
54
54
|
// Extract soup configuration for time distribution
|
|
55
|
-
// Dynamic peaks:
|
|
56
|
-
const defaultPeaks = Math.max(5,
|
|
57
|
-
const { mean = 0, deviation = 2, peaks = defaultPeaks } = config.soup || {};
|
|
55
|
+
// Dynamic peaks: enough to flatten DOW interference from chunk boundaries
|
|
56
|
+
const defaultPeaks = Math.max(5, (config.numDays || 30) * 2);
|
|
57
|
+
const { mean = 0, deviation = 2, peaks = defaultPeaks, dayOfWeekWeights, hourOfDayWeights } = /** @type {import('../../types').SoupConfig} */ (config.soup) || {};
|
|
58
58
|
|
|
59
59
|
// Extract feature flags from config
|
|
60
60
|
const {
|
|
@@ -102,7 +102,7 @@ export async function makeEvent(
|
|
|
102
102
|
shiftedTimestamp = earliestTime + context.TIME_SHIFT_SECONDS;
|
|
103
103
|
} else {
|
|
104
104
|
// TimeSoup returns unix seconds; shift and convert to ISO once
|
|
105
|
-
const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
|
|
105
|
+
const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean, dayOfWeekWeights, hourOfDayWeights, context.TIME_SHIFT_SECONDS);
|
|
106
106
|
shiftedTimestamp = soupTimestamp + context.TIME_SHIFT_SECONDS;
|
|
107
107
|
}
|
|
108
108
|
// Drop events that would land in the future (Mixpanel rewrites these to "now", causing pile-ups)
|
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
/** @typedef {import('../../types').Context} Context */
|
|
7
7
|
|
|
8
8
|
import dayjs from "dayjs";
|
|
9
|
-
import path from "path";
|
|
10
9
|
import { comma, ls, rm } from "ak-tools";
|
|
11
10
|
import * as u from "../utils/utils.js";
|
|
12
11
|
import mp from "mixpanel-import";
|
|
@@ -22,8 +21,6 @@ export async function sendToMixpanel(context) {
|
|
|
22
21
|
adSpendData,
|
|
23
22
|
eventData,
|
|
24
23
|
groupProfilesData,
|
|
25
|
-
lookupTableData,
|
|
26
|
-
mirrorEventData,
|
|
27
24
|
scdTableData,
|
|
28
25
|
userProfilesData,
|
|
29
26
|
groupEventData
|
|
@@ -41,7 +38,7 @@ export async function sendToMixpanel(context) {
|
|
|
41
38
|
|
|
42
39
|
const importResults = { events: {}, users: {}, groups: [] };
|
|
43
40
|
const isBATCH_MODE = context.isBatchMode();
|
|
44
|
-
|
|
41
|
+
_verbose = config.verbose !== false;
|
|
45
42
|
|
|
46
43
|
/** @type {import('mixpanel-import').Creds} */
|
|
47
44
|
const creds = { token };
|
|
@@ -236,15 +233,20 @@ export async function sendToMixpanel(context) {
|
|
|
236
233
|
if (!writeToDisk && isBATCH_MODE) {
|
|
237
234
|
const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
|
|
238
235
|
if (writeDir) {
|
|
236
|
+
const configName = context.config.name;
|
|
239
237
|
const listDir = await ls(writeDir);
|
|
240
238
|
// @ts-ignore
|
|
241
|
-
const files = listDir.filter(f =>
|
|
242
|
-
f.includes(
|
|
243
|
-
f.includes('-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
239
|
+
const files = listDir.filter(f => {
|
|
240
|
+
if (configName && !f.includes(configName)) return false;
|
|
241
|
+
return f.includes('-EVENTS') ||
|
|
242
|
+
f.includes('-USERS') ||
|
|
243
|
+
f.includes('-ADSPEND') ||
|
|
244
|
+
f.includes('-GROUPS') ||
|
|
245
|
+
f.includes('-GROUP-EVENTS') ||
|
|
246
|
+
f.includes('-SCD') ||
|
|
247
|
+
f.includes('-MIRROR') ||
|
|
248
|
+
f.includes('-LOOKUP');
|
|
249
|
+
});
|
|
248
250
|
for (const file of files) {
|
|
249
251
|
await rm(file);
|
|
250
252
|
}
|
|
@@ -255,9 +257,10 @@ export async function sendToMixpanel(context) {
|
|
|
255
257
|
}
|
|
256
258
|
|
|
257
259
|
/**
|
|
258
|
-
*
|
|
260
|
+
* Logging function that respects verbose config
|
|
259
261
|
* @param {string} message - Message to log
|
|
260
262
|
*/
|
|
263
|
+
let _verbose = true;
|
|
261
264
|
function log(message) {
|
|
262
|
-
console.log(message);
|
|
265
|
+
if (_verbose) console.log(message);
|
|
263
266
|
}
|
|
@@ -59,6 +59,7 @@ export async function userLoop(context) {
|
|
|
59
59
|
let cancelled = false;
|
|
60
60
|
const onSigint = () => {
|
|
61
61
|
cancelled = true;
|
|
62
|
+
USER_CONN.clearQueue();
|
|
62
63
|
if (verbose) console.log(`\n\nStopping generation (Ctrl+C)...\n`);
|
|
63
64
|
};
|
|
64
65
|
process.on('SIGINT', onSigint);
|
|
@@ -109,7 +110,7 @@ export async function userLoop(context) {
|
|
|
109
110
|
if (userIsBornInDataset) {
|
|
110
111
|
let biasedCreated = dayjs(created).subtract(daysShift, 'd');
|
|
111
112
|
|
|
112
|
-
if (bornRecentBias
|
|
113
|
+
if (bornRecentBias !== 0) {
|
|
113
114
|
// Calculate how far into the dataset this user was born (0 = start, 1 = end/recent)
|
|
114
115
|
const datasetStart = dayjs.unix(global.FIXED_BEGIN);
|
|
115
116
|
const datasetEnd = dayjs.unix(context.FIXED_NOW);
|
|
@@ -117,10 +118,17 @@ export async function userLoop(context) {
|
|
|
117
118
|
// Clamp userPosition to [0, 1] to handle edge cases from rounding in time calculations
|
|
118
119
|
const userPosition = Math.max(0, Math.min(1, biasedCreated.diff(datasetStart) / totalDuration));
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
121
|
+
let biasedPosition;
|
|
122
|
+
if (bornRecentBias > 0) {
|
|
123
|
+
// Positive bias: exponent < 1 shifts distribution toward 1 (recent)
|
|
124
|
+
const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
|
|
125
|
+
biasedPosition = Math.pow(userPosition, exponent);
|
|
126
|
+
} else {
|
|
127
|
+
// Negative bias: mirror the power function to shift toward 0 (early)
|
|
128
|
+
// -0.3 bias -> 0.79 exponent applied to (1 - position), then mirrored back
|
|
129
|
+
const exponent = 1 - (Math.abs(bornRecentBias) * 0.7);
|
|
130
|
+
biasedPosition = 1 - Math.pow(1 - userPosition, exponent);
|
|
131
|
+
}
|
|
124
132
|
|
|
125
133
|
// Convert back to timestamp
|
|
126
134
|
biasedCreated = datasetStart.add(biasedPosition * totalDuration, 'millisecond');
|
|
@@ -233,7 +241,7 @@ export async function userLoop(context) {
|
|
|
233
241
|
|
|
234
242
|
// ALL SUBSEQUENT EVENTS (funnels for converted users, standalone for all)
|
|
235
243
|
let userChurned = false;
|
|
236
|
-
while (numEventsPreformed < numEventsThisUserWillPreform) {
|
|
244
|
+
while (numEventsPreformed < numEventsThisUserWillPreform && !cancelled) {
|
|
237
245
|
let newEvents;
|
|
238
246
|
if (usageFunnels.length && userConverted) {
|
|
239
247
|
const currentFunnel = chance.pickone(usageFunnels);
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TimeSoup preset configurations
|
|
3
|
+
* Each preset defines time distribution parameters that produce distinct patterns.
|
|
4
|
+
*
|
|
5
|
+
* Parameters:
|
|
6
|
+
* - peaks(numDays): function returning number of Gaussian clusters
|
|
7
|
+
* - deviation: controls peak width (higher = tighter)
|
|
8
|
+
* - mean: offset from chunk center (0 = centered)
|
|
9
|
+
* - dayOfWeekWeights: 7-element array [Sun..Sat], max=1.0, null to disable
|
|
10
|
+
* - hourOfDayWeights: 24-element array [0h..23h UTC], max=1.0, null to disable
|
|
11
|
+
*
|
|
12
|
+
* Some presets also suggest bornRecentBias and percentUsersBornInDataset,
|
|
13
|
+
* but those are top-level dungeon config — presets only set them if not already specified.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
// Real-world Mixpanel DOW pattern: weekday-heavy, Saturday valley
|
|
17
|
+
export const REAL_DOW = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
|
|
18
|
+
|
|
19
|
+
// Real-world Mixpanel HOD pattern: early-morning peak (UTC), afternoon valley
|
|
20
|
+
export const REAL_HOD = [
|
|
21
|
+
0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
|
|
22
|
+
0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
|
|
23
|
+
0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
// Flat weights (no cyclical pattern)
|
|
27
|
+
export const FLAT_DOW = [1, 1, 1, 1, 1, 1, 1];
|
|
28
|
+
export const FLAT_HOD = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
|
29
|
+
|
|
30
|
+
/** @type {Record<string, {peaks: (numDays: number) => number, deviation: number, mean: number, dayOfWeekWeights: number[]|null, hourOfDayWeights: number[]|null, bornRecentBias?: number, percentUsersBornInDataset?: number}>} */
|
|
31
|
+
export const SOUP_PRESETS = {
|
|
32
|
+
/**
|
|
33
|
+
* steady — Mature SaaS / Stable Product
|
|
34
|
+
* Nearly flat day-over-day, slight weekly pattern, minimal growth trend.
|
|
35
|
+
*/
|
|
36
|
+
steady: {
|
|
37
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
38
|
+
deviation: 1.5,
|
|
39
|
+
mean: 0,
|
|
40
|
+
dayOfWeekWeights: REAL_DOW,
|
|
41
|
+
hourOfDayWeights: REAL_HOD,
|
|
42
|
+
bornRecentBias: 0.1,
|
|
43
|
+
percentUsersBornInDataset: 10,
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* growth — Growing Startup (DEFAULT)
|
|
48
|
+
* Gradual uptrend with visible weekly peaks. This is the default behavior.
|
|
49
|
+
*/
|
|
50
|
+
growth: {
|
|
51
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
52
|
+
deviation: 2,
|
|
53
|
+
mean: 0,
|
|
54
|
+
dayOfWeekWeights: REAL_DOW,
|
|
55
|
+
hourOfDayWeights: REAL_HOD,
|
|
56
|
+
bornRecentBias: 0.3,
|
|
57
|
+
percentUsersBornInDataset: 15,
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* spiky — Event-Driven / Bursty
|
|
62
|
+
* Clear peaks and valleys, dramatic variation. Fewer Gaussian clusters + tight deviation.
|
|
63
|
+
*/
|
|
64
|
+
spiky: {
|
|
65
|
+
peaks: (numDays) => Math.max(5, Math.ceil(numDays / 10)),
|
|
66
|
+
deviation: 3.5,
|
|
67
|
+
mean: 0,
|
|
68
|
+
dayOfWeekWeights: REAL_DOW,
|
|
69
|
+
hourOfDayWeights: REAL_HOD,
|
|
70
|
+
bornRecentBias: 0.3,
|
|
71
|
+
percentUsersBornInDataset: 20,
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* seasonal — Strong Cyclical Patterns
|
|
76
|
+
* 3-4 major waves across the dataset. Very few peaks create dramatic macro trends.
|
|
77
|
+
*/
|
|
78
|
+
seasonal: {
|
|
79
|
+
peaks: () => 4,
|
|
80
|
+
deviation: 2.5,
|
|
81
|
+
mean: 0,
|
|
82
|
+
dayOfWeekWeights: REAL_DOW,
|
|
83
|
+
hourOfDayWeights: REAL_HOD,
|
|
84
|
+
bornRecentBias: 0.2,
|
|
85
|
+
percentUsersBornInDataset: 25,
|
|
86
|
+
},
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* global — Distributed Users Across Timezones
|
|
90
|
+
* Very flat hourly + daily distribution. No cyclical patterns.
|
|
91
|
+
*/
|
|
92
|
+
global: {
|
|
93
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
94
|
+
deviation: 1,
|
|
95
|
+
mean: 0,
|
|
96
|
+
dayOfWeekWeights: FLAT_DOW,
|
|
97
|
+
hourOfDayWeights: FLAT_HOD,
|
|
98
|
+
bornRecentBias: 0,
|
|
99
|
+
percentUsersBornInDataset: 10,
|
|
100
|
+
},
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* churny — High Churn / Declining Product
|
|
104
|
+
* Flat distribution (no growth trend). All users pre-exist the dataset,
|
|
105
|
+
* so there's no acceleration. Combine with an "everything" hook that
|
|
106
|
+
* filters late events to create a true declining shape.
|
|
107
|
+
*/
|
|
108
|
+
churny: {
|
|
109
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
110
|
+
deviation: 2,
|
|
111
|
+
mean: 0,
|
|
112
|
+
dayOfWeekWeights: REAL_DOW,
|
|
113
|
+
hourOfDayWeights: REAL_HOD,
|
|
114
|
+
bornRecentBias: 0,
|
|
115
|
+
percentUsersBornInDataset: 5,
|
|
116
|
+
},
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* chaotic — Unpredictable / Irregular Patterns
|
|
120
|
+
* Few peaks + very tight clustering = dramatic bursts separated by quiet stretches.
|
|
121
|
+
*/
|
|
122
|
+
chaotic: {
|
|
123
|
+
peaks: (numDays) => Math.max(3, Math.ceil(numDays / 20)),
|
|
124
|
+
deviation: 4,
|
|
125
|
+
mean: 0,
|
|
126
|
+
dayOfWeekWeights: REAL_DOW,
|
|
127
|
+
hourOfDayWeights: REAL_HOD,
|
|
128
|
+
bornRecentBias: 0.5,
|
|
129
|
+
percentUsersBornInDataset: 40,
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
/** @type {string[]} */
|
|
134
|
+
export const PRESET_NAMES = Object.keys(SOUP_PRESETS);
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Resolves a soup config — handles string presets, preset+overrides, and raw objects.
|
|
138
|
+
* @param {string | object} soup - Soup config from dungeon
|
|
139
|
+
* @param {number} numDays - Number of days in the dataset
|
|
140
|
+
* @returns {{ soup: object, suggestedBornRecentBias?: number, suggestedPercentUsersBornInDataset?: number }}
|
|
141
|
+
*/
|
|
142
|
+
export function resolveSoup(soup, numDays) {
|
|
143
|
+
if (!soup) return { soup: {} };
|
|
144
|
+
|
|
145
|
+
// String preset: "growth", "spiky", etc.
|
|
146
|
+
if (typeof soup === 'string') {
|
|
147
|
+
const preset = SOUP_PRESETS[soup];
|
|
148
|
+
if (!preset) {
|
|
149
|
+
throw new Error(`Unknown soup preset: "${soup}". Valid presets: ${PRESET_NAMES.join(', ')}`);
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
soup: {
|
|
153
|
+
peaks: preset.peaks(numDays),
|
|
154
|
+
deviation: preset.deviation,
|
|
155
|
+
mean: preset.mean,
|
|
156
|
+
dayOfWeekWeights: preset.dayOfWeekWeights,
|
|
157
|
+
hourOfDayWeights: preset.hourOfDayWeights,
|
|
158
|
+
},
|
|
159
|
+
suggestedBornRecentBias: preset.bornRecentBias,
|
|
160
|
+
suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Object with preset key: { preset: "growth", deviation: 3 }
|
|
165
|
+
if (typeof soup === 'object' && soup.preset) {
|
|
166
|
+
const preset = SOUP_PRESETS[soup.preset];
|
|
167
|
+
if (!preset) {
|
|
168
|
+
throw new Error(`Unknown soup preset: "${soup.preset}". Valid presets: ${PRESET_NAMES.join(', ')}`);
|
|
169
|
+
}
|
|
170
|
+
const base = {
|
|
171
|
+
peaks: preset.peaks(numDays),
|
|
172
|
+
deviation: preset.deviation,
|
|
173
|
+
mean: preset.mean,
|
|
174
|
+
dayOfWeekWeights: preset.dayOfWeekWeights,
|
|
175
|
+
hourOfDayWeights: preset.hourOfDayWeights,
|
|
176
|
+
};
|
|
177
|
+
// Apply overrides (excluding the 'preset' key itself)
|
|
178
|
+
const { preset: _, ...overrides } = soup;
|
|
179
|
+
return {
|
|
180
|
+
soup: { ...base, ...overrides },
|
|
181
|
+
suggestedBornRecentBias: preset.bornRecentBias,
|
|
182
|
+
suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Raw object: { peaks: 10, deviation: 2 } — pass through unchanged
|
|
187
|
+
return { soup };
|
|
188
|
+
}
|
package/lib/utils/utils.js
CHANGED
|
@@ -1180,7 +1180,17 @@ let soupHits = 0;
|
|
|
1180
1180
|
* Divides the range into `peaks` chunks, picks one randomly, then samples within it.
|
|
1181
1181
|
* Returns unix seconds (not ISO string) for performance — caller converts once.
|
|
1182
1182
|
*/
|
|
1183
|
-
|
|
1183
|
+
// Default day-of-week weights (0=Sun, 1=Mon, ..., 6=Sat) — derived from real Mixpanel data
|
|
1184
|
+
const DEFAULT_DOW_WEIGHTS = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
|
|
1185
|
+
|
|
1186
|
+
// Default hour-of-day weights (0=midnight, ..., 23=11pm UTC) — derived from real Mixpanel data
|
|
1187
|
+
const DEFAULT_HOD_WEIGHTS = [
|
|
1188
|
+
0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
|
|
1189
|
+
0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
|
|
1190
|
+
0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
|
|
1191
|
+
];
|
|
1192
|
+
|
|
1193
|
+
function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0, dayOfWeekWeights = DEFAULT_DOW_WEIGHTS, hourOfDayWeights = DEFAULT_HOD_WEIGHTS, timeShiftSeconds = 0) {
|
|
1184
1194
|
if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
|
|
1185
1195
|
if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
|
|
1186
1196
|
const chance = getChance();
|
|
@@ -1193,21 +1203,57 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
|
|
|
1193
1203
|
}
|
|
1194
1204
|
const chunkSize = totalRange / peaks;
|
|
1195
1205
|
|
|
1196
|
-
//
|
|
1206
|
+
// Phase 1: Gaussian chunk sampling (macro trend across the time range)
|
|
1197
1207
|
const peakIndex = integer(0, peaks - 1);
|
|
1198
1208
|
const chunkStart = earliestTime + peakIndex * chunkSize;
|
|
1199
1209
|
const chunkEnd = chunkStart + chunkSize;
|
|
1200
1210
|
const chunkMid = (chunkStart + chunkEnd) / 2;
|
|
1201
|
-
|
|
1202
|
-
// Generate offset from normal distribution, clamp to chunk boundaries
|
|
1203
1211
|
const maxDeviation = chunkSize / deviation;
|
|
1204
1212
|
const offset = chance.normal({ mean: mean, dev: maxDeviation });
|
|
1205
1213
|
const proposedTime = chunkMid + offset;
|
|
1206
1214
|
const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
|
|
1207
|
-
|
|
1215
|
+
let candidate = Math.max(earliestTime, Math.min(latestTime, clampedTime));
|
|
1216
|
+
|
|
1217
|
+
// Phase 2: DOW accept/reject — retry if day-of-week doesn't pass weight check
|
|
1218
|
+
if (dayOfWeekWeights) {
|
|
1219
|
+
for (let attempt = 0; attempt < 50; attempt++) {
|
|
1220
|
+
const dow = new Date((candidate + timeShiftSeconds) * 1000).getUTCDay();
|
|
1221
|
+
if (chance.random() < dayOfWeekWeights[dow]) break;
|
|
1222
|
+
// Rejected — resample from Gaussian chunks
|
|
1223
|
+
const pi = integer(0, peaks - 1);
|
|
1224
|
+
const cs = earliestTime + pi * chunkSize;
|
|
1225
|
+
const ce = cs + chunkSize;
|
|
1226
|
+
const cm = (cs + ce) / 2;
|
|
1227
|
+
const md = chunkSize / deviation;
|
|
1228
|
+
const off = chance.normal({ mean: mean, dev: md });
|
|
1229
|
+
const pt = cm + off;
|
|
1230
|
+
candidate = Math.max(earliestTime, Math.min(latestTime, Math.max(cs, Math.min(ce, pt))));
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
// Phase 3: Redistribute hour-of-day (changes only hour within same day)
|
|
1235
|
+
if (hourOfDayWeights) {
|
|
1236
|
+
const shifted = candidate + timeShiftSeconds;
|
|
1237
|
+
const d = new Date(shifted * 1000);
|
|
1238
|
+
const currentMinute = d.getUTCMinutes();
|
|
1239
|
+
const currentSecond = d.getUTCSeconds();
|
|
1240
|
+
|
|
1241
|
+
const totalHodWeight = hourOfDayWeights.reduce((s, w) => s + w, 0);
|
|
1242
|
+
let roll = chance.random() * totalHodWeight;
|
|
1243
|
+
let newHour = 0;
|
|
1244
|
+
for (let h = 0; h < 24; h++) {
|
|
1245
|
+
roll -= hourOfDayWeights[h];
|
|
1246
|
+
if (roll <= 0) { newHour = h; break; }
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
const dayStartShifted = Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()) / 1000;
|
|
1250
|
+
const newShifted = dayStartShifted + newHour * 3600 + currentMinute * 60 + currentSecond;
|
|
1251
|
+
candidate = newShifted - timeShiftSeconds;
|
|
1252
|
+
candidate = Math.max(earliestTime, Math.min(latestTime, candidate));
|
|
1253
|
+
}
|
|
1208
1254
|
|
|
1209
1255
|
soupHits++;
|
|
1210
|
-
return
|
|
1256
|
+
return candidate;
|
|
1211
1257
|
}
|
|
1212
1258
|
|
|
1213
1259
|
|