make-mp-data 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/dungeons/array-of-object-lookup-schema.json +327 -0
- package/dungeons/array-of-object-lookup.js +28 -8
- package/dungeons/capstone/capstone-ic3.js +291 -0
- package/dungeons/capstone/capstone-ic4.js +598 -0
- package/dungeons/capstone/capstone-ic5.js +668 -0
- package/dungeons/capstone/generate-product-lookup.js +309 -0
- package/dungeons/ecommerce-schema.json +462 -0
- package/dungeons/{copilot.js → ecommerce.js} +77 -15
- package/dungeons/education-schema.json +2409 -0
- package/dungeons/education.js +206 -442
- package/dungeons/fintech-schema.json +14034 -0
- package/dungeons/fintech.js +110 -389
- package/dungeons/foobar-schema.json +403 -0
- package/dungeons/foobar.js +27 -4
- package/dungeons/food-delivery-schema.json +192 -0
- package/dungeons/food-delivery.js +602 -0
- package/dungeons/food-schema.json +1152 -0
- package/dungeons/food.js +150 -383
- package/dungeons/gaming-schema.json +1270 -0
- package/dungeons/gaming.js +143 -3
- package/dungeons/insurance-application-schema.json +204 -0
- package/dungeons/insurance-application.js +605 -0
- package/dungeons/media-schema.json +906 -0
- package/dungeons/media.js +221 -391
- package/dungeons/retention-cadence-schema.json +78 -0
- package/dungeons/retention-cadence.js +35 -1
- package/dungeons/rpg-schema.json +4526 -0
- package/dungeons/rpg.js +130 -388
- package/dungeons/sanity-schema.json +255 -0
- package/dungeons/sanity.js +21 -10
- package/dungeons/sass-schema.json +1291 -0
- package/dungeons/sass.js +210 -337
- package/dungeons/scd-schema.json +919 -0
- package/dungeons/scd.js +38 -10
- package/dungeons/simple-schema.json +608 -0
- package/dungeons/simple.js +48 -11
- package/dungeons/simplest-schema.json +1418 -0
- package/dungeons/simplest.js +392 -0
- package/dungeons/social-schema.json +1118 -0
- package/dungeons/social.js +124 -365
- package/dungeons/text-generation-schema.json +3096 -0
- package/dungeons/text-generation.js +71 -0
- package/index.js +6 -3
- package/lib/core/config-validator.js +18 -0
- package/lib/core/storage.js +5 -5
- package/lib/generators/events.js +4 -4
- package/lib/orchestrators/mixpanel-sender.js +12 -7
- package/lib/orchestrators/user-loop.js +14 -6
- package/lib/templates/soup-presets.js +188 -0
- package/lib/utils/utils.js +52 -6
- package/package.json +1 -1
- package/types.d.ts +20 -3
- package/dungeons/adspend.js +0 -117
- package/dungeons/anon.js +0 -128
- package/dungeons/benchmark-heavy.js +0 -240
- package/dungeons/benchmark-light.js +0 -126
- package/dungeons/big.js +0 -226
- package/dungeons/business.js +0 -391
- package/dungeons/complex.js +0 -428
- package/dungeons/experiments.js +0 -137
- package/dungeons/funnels.js +0 -309
- package/dungeons/mil.js +0 -323
- package/dungeons/mirror.js +0 -160
- package/dungeons/soup-test.js +0 -52
- package/dungeons/streaming.js +0 -372
- package/dungeons/strict-event-test.js +0 -30
- package/dungeons/student-teacher.js +0 -438
- package/dungeons/too-big-events.js +0 -203
- package/dungeons/user-agent.js +0 -209
|
@@ -12,6 +12,77 @@ const days = 92;
|
|
|
12
12
|
|
|
13
13
|
/** @typedef {import("../types.js").Dungeon} Dungeon */
|
|
14
14
|
|
|
15
|
+
/*
|
|
16
|
+
* ============================================================================
|
|
17
|
+
* DATASET OVERVIEW
|
|
18
|
+
* ============================================================================
|
|
19
|
+
*
|
|
20
|
+
* App: Text Generation Demo — showcases DM4's organic text generation
|
|
21
|
+
* Scale: 8,000 users, ~960K events, 92 days
|
|
22
|
+
*
|
|
23
|
+
* A text-heavy SaaS analytics dungeon that exercises every text generation
|
|
24
|
+
* style available in DM4. Users interact through support tickets, product
|
|
25
|
+
* reviews, forum posts, search queries, chat messages, social media posts
|
|
26
|
+
* (Twitter, LinkedIn, Reddit), bug reports, feature requests, onboarding
|
|
27
|
+
* feedback, charity/wedding comments, and webinar chat.
|
|
28
|
+
*
|
|
29
|
+
* Each event type uses a dedicated createTextGenerator() configured with its
|
|
30
|
+
* own style, tone, formality, keyword banks, typo rates, and authenticity
|
|
31
|
+
* levels. Text generator styles used: support, review, forum, search,
|
|
32
|
+
* feedback, chat, email, tweet, comments (9 distinct styles).
|
|
33
|
+
*
|
|
34
|
+
* Events:
|
|
35
|
+
* social_media_tweet (15) > chat_message (10) > charity_comment_posted (8)
|
|
36
|
+
* > wedding_comment_posted (6) > company_announcement_tweet (3)
|
|
37
|
+
* > all others (1 each): support ticket, review, forum post, search,
|
|
38
|
+
* feedback, email, twitter, linkedin, reddit, bug report, feature
|
|
39
|
+
* request, onboarding, tutorial comment, webinar chat, api thread
|
|
40
|
+
* ============================================================================
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
/*
|
|
44
|
+
* ============================================================================
|
|
45
|
+
* ANALYTICS HOOKS
|
|
46
|
+
* ============================================================================
|
|
47
|
+
*
|
|
48
|
+
* Hook 1: Power User + Churn Risk Classification
|
|
49
|
+
* Type: user
|
|
50
|
+
* What: Users with engagement_score > 70 get is_power_user = true.
|
|
51
|
+
* Users inactive > 20 days get risk_level = "high_churn", else "healthy".
|
|
52
|
+
* Mixpanel report:
|
|
53
|
+
* - Insights > user profile breakdown by "is_power_user"
|
|
54
|
+
* - Insights > user profile breakdown by "risk_level", cross-reference
|
|
55
|
+
* with "user_tier" to see churn risk by plan
|
|
56
|
+
*
|
|
57
|
+
* Hook 2: Critical Ticket Auto-Escalation
|
|
58
|
+
* Type: event
|
|
59
|
+
* What: enterprise_support_ticket events with priority = "critical" get
|
|
60
|
+
* escalation_level bumped by 1 (max 3) and auto_escalated = true.
|
|
61
|
+
* Mixpanel report:
|
|
62
|
+
* - Insights > "enterprise_support_ticket" total events, breakdown by
|
|
63
|
+
* auto_escalated
|
|
64
|
+
* - Expect: critical tickets show auto_escalated = true
|
|
65
|
+
*
|
|
66
|
+
* Hook 3: Critical Bug Flagging
|
|
67
|
+
* Type: event
|
|
68
|
+
* What: bug_report_submitted events with severity = "critical" AND
|
|
69
|
+
* is_reproducible = true get requires_immediate_review = true and a
|
|
70
|
+
* random estimated_fix_hours (1-8).
|
|
71
|
+
* Mixpanel report:
|
|
72
|
+
* - Insights > "bug_report_submitted" total events, breakdown by
|
|
73
|
+
* requires_immediate_review
|
|
74
|
+
* - Expect: only critical + reproducible bugs are flagged
|
|
75
|
+
*
|
|
76
|
+
* Hook 4: Enterprise Satisfaction Survey Injection
|
|
77
|
+
* Type: everything
|
|
78
|
+
* What: Enterprise-tier users with > 5 events get a
|
|
79
|
+
* "satisfaction_survey_triggered" event appended with a 1-10 NPS score.
|
|
80
|
+
* Mixpanel report:
|
|
81
|
+
* - Insights > "satisfaction_survey_triggered" AVG(score), breakdown by
|
|
82
|
+
* product_tier
|
|
83
|
+
* - Expect: only enterprise users have survey events
|
|
84
|
+
* ============================================================================
|
|
85
|
+
*/
|
|
15
86
|
|
|
16
87
|
// Enterprise support ticket generator with keywords and high authenticity
|
|
17
88
|
const enterpriseSupportGen = createTextGenerator({
|
package/index.js
CHANGED
|
@@ -197,13 +197,16 @@ async function main(config) {
|
|
|
197
197
|
|
|
198
198
|
// ! DATA GENERATION ENDS HERE
|
|
199
199
|
|
|
200
|
+
// Flush when writeToDisk is enabled OR batch mode activated (to capture tail data)
|
|
201
|
+
const shouldFlush = validatedConfig.writeToDisk || context.isBatchMode();
|
|
202
|
+
|
|
200
203
|
// Step 10: Flush lookup tables to disk (always as CSVs)
|
|
201
|
-
if (
|
|
204
|
+
if (shouldFlush) {
|
|
202
205
|
await flushLookupTablesToDisk(storage, validatedConfig);
|
|
203
206
|
}
|
|
204
207
|
|
|
205
|
-
// Step 11: Flush other storage containers to disk
|
|
206
|
-
if (
|
|
208
|
+
// Step 11: Flush other storage containers to disk
|
|
209
|
+
if (shouldFlush) {
|
|
207
210
|
await flushStorageToDisk(storage, validatedConfig);
|
|
208
211
|
}
|
|
209
212
|
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
import dayjs from "dayjs";
|
|
12
12
|
import { makeName } from "ak-tools";
|
|
13
13
|
import * as u from "../utils/utils.js";
|
|
14
|
+
import { resolveSoup } from "../templates/soup-presets.js";
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* Infers funnels from the provided events
|
|
@@ -136,6 +137,12 @@ export function validateDungeonConfig(config) {
|
|
|
136
137
|
concurrency = 1;
|
|
137
138
|
}
|
|
138
139
|
|
|
140
|
+
// Auto-enable batch mode for large datasets to prevent OOM
|
|
141
|
+
if (numEvents >= 2_000_000 && config.batchSize === undefined) {
|
|
142
|
+
batchSize = 1_000_000;
|
|
143
|
+
console.warn(`⚠️ Auto-enabling batch mode: numEvents (${numEvents.toLocaleString()}) >= 2M. Using batchSize of ${batchSize.toLocaleString()}.`);
|
|
144
|
+
}
|
|
145
|
+
|
|
139
146
|
// Ensure defaults for deep objects
|
|
140
147
|
if (!config.superProps) config.superProps = superProps;
|
|
141
148
|
if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
|
|
@@ -148,6 +155,17 @@ export function validateDungeonConfig(config) {
|
|
|
148
155
|
throw new Error("Either epochStart or numDays must be provided");
|
|
149
156
|
}
|
|
150
157
|
|
|
158
|
+
// Resolve soup presets (must happen after numDays is computed)
|
|
159
|
+
const resolved = resolveSoup(soup, numDays);
|
|
160
|
+
soup = resolved.soup;
|
|
161
|
+
// Apply suggested birth distribution params if not explicitly set by the dungeon
|
|
162
|
+
if (resolved.suggestedBornRecentBias !== undefined && config.bornRecentBias === undefined) {
|
|
163
|
+
config.bornRecentBias = resolved.suggestedBornRecentBias;
|
|
164
|
+
}
|
|
165
|
+
if (resolved.suggestedPercentUsersBornInDataset !== undefined && config.percentUsersBornInDataset === undefined) {
|
|
166
|
+
config.percentUsersBornInDataset = resolved.suggestedPercentUsersBornInDataset;
|
|
167
|
+
}
|
|
168
|
+
|
|
151
169
|
// Use provided name if non-empty string, otherwise generate one
|
|
152
170
|
if (!name || name === "") {
|
|
153
171
|
name = makeName();
|
package/lib/core/storage.js
CHANGED
|
@@ -366,12 +366,12 @@ export class StorageManager {
|
|
|
366
366
|
if (config.writeToDisk === false) {
|
|
367
367
|
const batchSize = config.batchSize || 1_000_000;
|
|
368
368
|
const numEvents = config.numEvents || 0;
|
|
369
|
-
|
|
369
|
+
|
|
370
370
|
if (batchSize < numEvents) {
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
`
|
|
374
|
-
`
|
|
371
|
+
console.warn(
|
|
372
|
+
`⚠️ writeToDisk is false but batchSize (${batchSize.toLocaleString()}) < numEvents (${numEvents.toLocaleString()}). ` +
|
|
373
|
+
`Batch files will be written to disk temporarily to avoid OOM. ` +
|
|
374
|
+
`They will be cleaned up after Mixpanel import if a token is provided.`
|
|
375
375
|
);
|
|
376
376
|
}
|
|
377
377
|
}
|
package/lib/generators/events.js
CHANGED
|
@@ -52,9 +52,9 @@ export async function makeEvent(
|
|
|
52
52
|
const chance = u.getChance();
|
|
53
53
|
|
|
54
54
|
// Extract soup configuration for time distribution
|
|
55
|
-
// Dynamic peaks:
|
|
56
|
-
const defaultPeaks = Math.max(5,
|
|
57
|
-
const { mean = 0, deviation = 2, peaks = defaultPeaks } = config.soup || {};
|
|
55
|
+
// Dynamic peaks: enough to flatten DOW interference from chunk boundaries
|
|
56
|
+
const defaultPeaks = Math.max(5, (config.numDays || 30) * 2);
|
|
57
|
+
const { mean = 0, deviation = 2, peaks = defaultPeaks, dayOfWeekWeights, hourOfDayWeights } = /** @type {import('../../types').SoupConfig} */ (config.soup) || {};
|
|
58
58
|
|
|
59
59
|
// Extract feature flags from config
|
|
60
60
|
const {
|
|
@@ -102,7 +102,7 @@ export async function makeEvent(
|
|
|
102
102
|
shiftedTimestamp = earliestTime + context.TIME_SHIFT_SECONDS;
|
|
103
103
|
} else {
|
|
104
104
|
// TimeSoup returns unix seconds; shift and convert to ISO once
|
|
105
|
-
const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
|
|
105
|
+
const soupTimestamp = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean, dayOfWeekWeights, hourOfDayWeights, context.TIME_SHIFT_SECONDS);
|
|
106
106
|
shiftedTimestamp = soupTimestamp + context.TIME_SHIFT_SECONDS;
|
|
107
107
|
}
|
|
108
108
|
// Drop events that would land in the future (Mixpanel rewrites these to "now", causing pile-ups)
|
|
@@ -233,15 +233,20 @@ export async function sendToMixpanel(context) {
|
|
|
233
233
|
if (!writeToDisk && isBATCH_MODE) {
|
|
234
234
|
const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
|
|
235
235
|
if (writeDir) {
|
|
236
|
+
const configName = context.config.name;
|
|
236
237
|
const listDir = await ls(writeDir);
|
|
237
238
|
// @ts-ignore
|
|
238
|
-
const files = listDir.filter(f =>
|
|
239
|
-
f.includes(
|
|
240
|
-
f.includes('-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
239
|
+
const files = listDir.filter(f => {
|
|
240
|
+
if (configName && !f.includes(configName)) return false;
|
|
241
|
+
return f.includes('-EVENTS') ||
|
|
242
|
+
f.includes('-USERS') ||
|
|
243
|
+
f.includes('-ADSPEND') ||
|
|
244
|
+
f.includes('-GROUPS') ||
|
|
245
|
+
f.includes('-GROUP-EVENTS') ||
|
|
246
|
+
f.includes('-SCD') ||
|
|
247
|
+
f.includes('-MIRROR') ||
|
|
248
|
+
f.includes('-LOOKUP');
|
|
249
|
+
});
|
|
245
250
|
for (const file of files) {
|
|
246
251
|
await rm(file);
|
|
247
252
|
}
|
|
@@ -59,6 +59,7 @@ export async function userLoop(context) {
|
|
|
59
59
|
let cancelled = false;
|
|
60
60
|
const onSigint = () => {
|
|
61
61
|
cancelled = true;
|
|
62
|
+
USER_CONN.clearQueue();
|
|
62
63
|
if (verbose) console.log(`\n\nStopping generation (Ctrl+C)...\n`);
|
|
63
64
|
};
|
|
64
65
|
process.on('SIGINT', onSigint);
|
|
@@ -109,7 +110,7 @@ export async function userLoop(context) {
|
|
|
109
110
|
if (userIsBornInDataset) {
|
|
110
111
|
let biasedCreated = dayjs(created).subtract(daysShift, 'd');
|
|
111
112
|
|
|
112
|
-
if (bornRecentBias
|
|
113
|
+
if (bornRecentBias !== 0) {
|
|
113
114
|
// Calculate how far into the dataset this user was born (0 = start, 1 = end/recent)
|
|
114
115
|
const datasetStart = dayjs.unix(global.FIXED_BEGIN);
|
|
115
116
|
const datasetEnd = dayjs.unix(context.FIXED_NOW);
|
|
@@ -117,10 +118,17 @@ export async function userLoop(context) {
|
|
|
117
118
|
// Clamp userPosition to [0, 1] to handle edge cases from rounding in time calculations
|
|
118
119
|
const userPosition = Math.max(0, Math.min(1, biasedCreated.diff(datasetStart) / totalDuration));
|
|
119
120
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
121
|
+
let biasedPosition;
|
|
122
|
+
if (bornRecentBias > 0) {
|
|
123
|
+
// Positive bias: exponent < 1 shifts distribution toward 1 (recent)
|
|
124
|
+
const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
|
|
125
|
+
biasedPosition = Math.pow(userPosition, exponent);
|
|
126
|
+
} else {
|
|
127
|
+
// Negative bias: mirror the power function to shift toward 0 (early)
|
|
128
|
+
// -0.3 bias -> 0.79 exponent applied to (1 - position), then mirrored back
|
|
129
|
+
const exponent = 1 - (Math.abs(bornRecentBias) * 0.7);
|
|
130
|
+
biasedPosition = 1 - Math.pow(1 - userPosition, exponent);
|
|
131
|
+
}
|
|
124
132
|
|
|
125
133
|
// Convert back to timestamp
|
|
126
134
|
biasedCreated = datasetStart.add(biasedPosition * totalDuration, 'millisecond');
|
|
@@ -233,7 +241,7 @@ export async function userLoop(context) {
|
|
|
233
241
|
|
|
234
242
|
// ALL SUBSEQUENT EVENTS (funnels for converted users, standalone for all)
|
|
235
243
|
let userChurned = false;
|
|
236
|
-
while (numEventsPreformed < numEventsThisUserWillPreform) {
|
|
244
|
+
while (numEventsPreformed < numEventsThisUserWillPreform && !cancelled) {
|
|
237
245
|
let newEvents;
|
|
238
246
|
if (usageFunnels.length && userConverted) {
|
|
239
247
|
const currentFunnel = chance.pickone(usageFunnels);
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TimeSoup preset configurations
|
|
3
|
+
* Each preset defines time distribution parameters that produce distinct patterns.
|
|
4
|
+
*
|
|
5
|
+
* Parameters:
|
|
6
|
+
* - peaks(numDays): function returning number of Gaussian clusters
|
|
7
|
+
* - deviation: controls peak width (higher = tighter)
|
|
8
|
+
* - mean: offset from chunk center (0 = centered)
|
|
9
|
+
* - dayOfWeekWeights: 7-element array [Sun..Sat], max=1.0, null to disable
|
|
10
|
+
* - hourOfDayWeights: 24-element array [0h..23h UTC], max=1.0, null to disable
|
|
11
|
+
*
|
|
12
|
+
* Some presets also suggest bornRecentBias and percentUsersBornInDataset,
|
|
13
|
+
* but those are top-level dungeon config — presets only set them if not already specified.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
// Real-world Mixpanel DOW pattern: weekday-heavy, Saturday valley
|
|
17
|
+
export const REAL_DOW = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
|
|
18
|
+
|
|
19
|
+
// Real-world Mixpanel HOD pattern: early-morning peak (UTC), afternoon valley
|
|
20
|
+
export const REAL_HOD = [
|
|
21
|
+
0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
|
|
22
|
+
0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
|
|
23
|
+
0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
// Flat weights (no cyclical pattern)
|
|
27
|
+
export const FLAT_DOW = [1, 1, 1, 1, 1, 1, 1];
|
|
28
|
+
export const FLAT_HOD = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1];
|
|
29
|
+
|
|
30
|
+
/** @type {Record<string, {peaks: (numDays: number) => number, deviation: number, mean: number, dayOfWeekWeights: number[]|null, hourOfDayWeights: number[]|null, bornRecentBias?: number, percentUsersBornInDataset?: number}>} */
|
|
31
|
+
export const SOUP_PRESETS = {
|
|
32
|
+
/**
|
|
33
|
+
* steady — Mature SaaS / Stable Product
|
|
34
|
+
* Nearly flat day-over-day, slight weekly pattern, minimal growth trend.
|
|
35
|
+
*/
|
|
36
|
+
steady: {
|
|
37
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
38
|
+
deviation: 1.5,
|
|
39
|
+
mean: 0,
|
|
40
|
+
dayOfWeekWeights: REAL_DOW,
|
|
41
|
+
hourOfDayWeights: REAL_HOD,
|
|
42
|
+
bornRecentBias: 0.1,
|
|
43
|
+
percentUsersBornInDataset: 10,
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* growth — Growing Startup (DEFAULT)
|
|
48
|
+
* Gradual uptrend with visible weekly peaks. This is the default behavior.
|
|
49
|
+
*/
|
|
50
|
+
growth: {
|
|
51
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
52
|
+
deviation: 2,
|
|
53
|
+
mean: 0,
|
|
54
|
+
dayOfWeekWeights: REAL_DOW,
|
|
55
|
+
hourOfDayWeights: REAL_HOD,
|
|
56
|
+
bornRecentBias: 0.3,
|
|
57
|
+
percentUsersBornInDataset: 15,
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* spiky — Event-Driven / Bursty
|
|
62
|
+
* Clear peaks and valleys, dramatic variation. Fewer Gaussian clusters + tight deviation.
|
|
63
|
+
*/
|
|
64
|
+
spiky: {
|
|
65
|
+
peaks: (numDays) => Math.max(5, Math.ceil(numDays / 10)),
|
|
66
|
+
deviation: 3.5,
|
|
67
|
+
mean: 0,
|
|
68
|
+
dayOfWeekWeights: REAL_DOW,
|
|
69
|
+
hourOfDayWeights: REAL_HOD,
|
|
70
|
+
bornRecentBias: 0.3,
|
|
71
|
+
percentUsersBornInDataset: 20,
|
|
72
|
+
},
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* seasonal — Strong Cyclical Patterns
|
|
76
|
+
* 3-4 major waves across the dataset. Very few peaks create dramatic macro trends.
|
|
77
|
+
*/
|
|
78
|
+
seasonal: {
|
|
79
|
+
peaks: () => 4,
|
|
80
|
+
deviation: 2.5,
|
|
81
|
+
mean: 0,
|
|
82
|
+
dayOfWeekWeights: REAL_DOW,
|
|
83
|
+
hourOfDayWeights: REAL_HOD,
|
|
84
|
+
bornRecentBias: 0.2,
|
|
85
|
+
percentUsersBornInDataset: 25,
|
|
86
|
+
},
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* global — Distributed Users Across Timezones
|
|
90
|
+
* Very flat hourly + daily distribution. No cyclical patterns.
|
|
91
|
+
*/
|
|
92
|
+
global: {
|
|
93
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
94
|
+
deviation: 1,
|
|
95
|
+
mean: 0,
|
|
96
|
+
dayOfWeekWeights: FLAT_DOW,
|
|
97
|
+
hourOfDayWeights: FLAT_HOD,
|
|
98
|
+
bornRecentBias: 0,
|
|
99
|
+
percentUsersBornInDataset: 10,
|
|
100
|
+
},
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* churny — High Churn / Declining Product
|
|
104
|
+
* Flat distribution (no growth trend). All users pre-exist the dataset,
|
|
105
|
+
* so there's no acceleration. Combine with an "everything" hook that
|
|
106
|
+
* filters late events to create a true declining shape.
|
|
107
|
+
*/
|
|
108
|
+
churny: {
|
|
109
|
+
peaks: (numDays) => Math.max(5, numDays * 2),
|
|
110
|
+
deviation: 2,
|
|
111
|
+
mean: 0,
|
|
112
|
+
dayOfWeekWeights: REAL_DOW,
|
|
113
|
+
hourOfDayWeights: REAL_HOD,
|
|
114
|
+
bornRecentBias: 0,
|
|
115
|
+
percentUsersBornInDataset: 5,
|
|
116
|
+
},
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* chaotic — Unpredictable / Irregular Patterns
|
|
120
|
+
* Few peaks + very tight clustering = dramatic bursts separated by quiet stretches.
|
|
121
|
+
*/
|
|
122
|
+
chaotic: {
|
|
123
|
+
peaks: (numDays) => Math.max(3, Math.ceil(numDays / 20)),
|
|
124
|
+
deviation: 4,
|
|
125
|
+
mean: 0,
|
|
126
|
+
dayOfWeekWeights: REAL_DOW,
|
|
127
|
+
hourOfDayWeights: REAL_HOD,
|
|
128
|
+
bornRecentBias: 0.5,
|
|
129
|
+
percentUsersBornInDataset: 40,
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
/** @type {string[]} */
|
|
134
|
+
export const PRESET_NAMES = Object.keys(SOUP_PRESETS);
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Resolves a soup config — handles string presets, preset+overrides, and raw objects.
|
|
138
|
+
* @param {string | object} soup - Soup config from dungeon
|
|
139
|
+
* @param {number} numDays - Number of days in the dataset
|
|
140
|
+
* @returns {{ soup: object, suggestedBornRecentBias?: number, suggestedPercentUsersBornInDataset?: number }}
|
|
141
|
+
*/
|
|
142
|
+
export function resolveSoup(soup, numDays) {
|
|
143
|
+
if (!soup) return { soup: {} };
|
|
144
|
+
|
|
145
|
+
// String preset: "growth", "spiky", etc.
|
|
146
|
+
if (typeof soup === 'string') {
|
|
147
|
+
const preset = SOUP_PRESETS[soup];
|
|
148
|
+
if (!preset) {
|
|
149
|
+
throw new Error(`Unknown soup preset: "${soup}". Valid presets: ${PRESET_NAMES.join(', ')}`);
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
soup: {
|
|
153
|
+
peaks: preset.peaks(numDays),
|
|
154
|
+
deviation: preset.deviation,
|
|
155
|
+
mean: preset.mean,
|
|
156
|
+
dayOfWeekWeights: preset.dayOfWeekWeights,
|
|
157
|
+
hourOfDayWeights: preset.hourOfDayWeights,
|
|
158
|
+
},
|
|
159
|
+
suggestedBornRecentBias: preset.bornRecentBias,
|
|
160
|
+
suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Object with preset key: { preset: "growth", deviation: 3 }
|
|
165
|
+
if (typeof soup === 'object' && soup.preset) {
|
|
166
|
+
const preset = SOUP_PRESETS[soup.preset];
|
|
167
|
+
if (!preset) {
|
|
168
|
+
throw new Error(`Unknown soup preset: "${soup.preset}". Valid presets: ${PRESET_NAMES.join(', ')}`);
|
|
169
|
+
}
|
|
170
|
+
const base = {
|
|
171
|
+
peaks: preset.peaks(numDays),
|
|
172
|
+
deviation: preset.deviation,
|
|
173
|
+
mean: preset.mean,
|
|
174
|
+
dayOfWeekWeights: preset.dayOfWeekWeights,
|
|
175
|
+
hourOfDayWeights: preset.hourOfDayWeights,
|
|
176
|
+
};
|
|
177
|
+
// Apply overrides (excluding the 'preset' key itself)
|
|
178
|
+
const { preset: _, ...overrides } = soup;
|
|
179
|
+
return {
|
|
180
|
+
soup: { ...base, ...overrides },
|
|
181
|
+
suggestedBornRecentBias: preset.bornRecentBias,
|
|
182
|
+
suggestedPercentUsersBornInDataset: preset.percentUsersBornInDataset,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Raw object: { peaks: 10, deviation: 2 } — pass through unchanged
|
|
187
|
+
return { soup };
|
|
188
|
+
}
|
package/lib/utils/utils.js
CHANGED
|
@@ -1180,7 +1180,17 @@ let soupHits = 0;
|
|
|
1180
1180
|
* Divides the range into `peaks` chunks, picks one randomly, then samples within it.
|
|
1181
1181
|
* Returns unix seconds (not ISO string) for performance — caller converts once.
|
|
1182
1182
|
*/
|
|
1183
|
-
|
|
1183
|
+
// Default day-of-week weights (0=Sun, 1=Mon, ..., 6=Sat) — derived from real Mixpanel data
|
|
1184
|
+
const DEFAULT_DOW_WEIGHTS = [0.637, 1.0, 0.999, 0.998, 0.966, 0.802, 0.528];
|
|
1185
|
+
|
|
1186
|
+
// Default hour-of-day weights (0=midnight, ..., 23=11pm UTC) — derived from real Mixpanel data
|
|
1187
|
+
const DEFAULT_HOD_WEIGHTS = [
|
|
1188
|
+
0.949, 0.992, 0.998, 0.946, 0.895, 0.938, 1.0, 0.997,
|
|
1189
|
+
0.938, 0.894, 0.827, 0.786, 0.726, 0.699, 0.688, 0.643,
|
|
1190
|
+
0.584, 0.574, 0.554, 0.576, 0.604, 0.655, 0.722, 0.816
|
|
1191
|
+
];
|
|
1192
|
+
|
|
1193
|
+
function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0, dayOfWeekWeights = DEFAULT_DOW_WEIGHTS, hourOfDayWeights = DEFAULT_HOD_WEIGHTS, timeShiftSeconds = 0) {
|
|
1184
1194
|
if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
|
|
1185
1195
|
if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
|
|
1186
1196
|
const chance = getChance();
|
|
@@ -1193,21 +1203,57 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
|
|
|
1193
1203
|
}
|
|
1194
1204
|
const chunkSize = totalRange / peaks;
|
|
1195
1205
|
|
|
1196
|
-
//
|
|
1206
|
+
// Phase 1: Gaussian chunk sampling (macro trend across the time range)
|
|
1197
1207
|
const peakIndex = integer(0, peaks - 1);
|
|
1198
1208
|
const chunkStart = earliestTime + peakIndex * chunkSize;
|
|
1199
1209
|
const chunkEnd = chunkStart + chunkSize;
|
|
1200
1210
|
const chunkMid = (chunkStart + chunkEnd) / 2;
|
|
1201
|
-
|
|
1202
|
-
// Generate offset from normal distribution, clamp to chunk boundaries
|
|
1203
1211
|
const maxDeviation = chunkSize / deviation;
|
|
1204
1212
|
const offset = chance.normal({ mean: mean, dev: maxDeviation });
|
|
1205
1213
|
const proposedTime = chunkMid + offset;
|
|
1206
1214
|
const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
|
|
1207
|
-
|
|
1215
|
+
let candidate = Math.max(earliestTime, Math.min(latestTime, clampedTime));
|
|
1216
|
+
|
|
1217
|
+
// Phase 2: DOW accept/reject — retry if day-of-week doesn't pass weight check
|
|
1218
|
+
if (dayOfWeekWeights) {
|
|
1219
|
+
for (let attempt = 0; attempt < 50; attempt++) {
|
|
1220
|
+
const dow = new Date((candidate + timeShiftSeconds) * 1000).getUTCDay();
|
|
1221
|
+
if (chance.random() < dayOfWeekWeights[dow]) break;
|
|
1222
|
+
// Rejected — resample from Gaussian chunks
|
|
1223
|
+
const pi = integer(0, peaks - 1);
|
|
1224
|
+
const cs = earliestTime + pi * chunkSize;
|
|
1225
|
+
const ce = cs + chunkSize;
|
|
1226
|
+
const cm = (cs + ce) / 2;
|
|
1227
|
+
const md = chunkSize / deviation;
|
|
1228
|
+
const off = chance.normal({ mean: mean, dev: md });
|
|
1229
|
+
const pt = cm + off;
|
|
1230
|
+
candidate = Math.max(earliestTime, Math.min(latestTime, Math.max(cs, Math.min(ce, pt))));
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
// Phase 3: Redistribute hour-of-day (changes only hour within same day)
|
|
1235
|
+
if (hourOfDayWeights) {
|
|
1236
|
+
const shifted = candidate + timeShiftSeconds;
|
|
1237
|
+
const d = new Date(shifted * 1000);
|
|
1238
|
+
const currentMinute = d.getUTCMinutes();
|
|
1239
|
+
const currentSecond = d.getUTCSeconds();
|
|
1240
|
+
|
|
1241
|
+
const totalHodWeight = hourOfDayWeights.reduce((s, w) => s + w, 0);
|
|
1242
|
+
let roll = chance.random() * totalHodWeight;
|
|
1243
|
+
let newHour = 0;
|
|
1244
|
+
for (let h = 0; h < 24; h++) {
|
|
1245
|
+
roll -= hourOfDayWeights[h];
|
|
1246
|
+
if (roll <= 0) { newHour = h; break; }
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
const dayStartShifted = Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()) / 1000;
|
|
1250
|
+
const newShifted = dayStartShifted + newHour * 3600 + currentMinute * 60 + currentSecond;
|
|
1251
|
+
candidate = newShifted - timeShiftSeconds;
|
|
1252
|
+
candidate = Math.max(earliestTime, Math.min(latestTime, candidate));
|
|
1253
|
+
}
|
|
1208
1254
|
|
|
1209
1255
|
soupHits++;
|
|
1210
|
-
return
|
|
1256
|
+
return candidate;
|
|
1211
1257
|
}
|
|
1212
1258
|
|
|
1213
1259
|
|
package/package.json
CHANGED
package/types.d.ts
CHANGED
|
@@ -133,17 +133,34 @@ export type SCDProp = {
|
|
|
133
133
|
};
|
|
134
134
|
|
|
135
135
|
/**
|
|
136
|
-
*
|
|
136
|
+
* Soup preset names for common time distribution patterns
|
|
137
137
|
*/
|
|
138
|
-
type
|
|
138
|
+
export type SoupPreset = "steady" | "growth" | "spiky" | "seasonal" | "global" | "churny" | "chaotic";
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Soup configuration object for fine-grained control
|
|
142
|
+
*/
|
|
143
|
+
export type SoupConfig = {
|
|
144
|
+
/** Use a named preset as base, then override individual fields */
|
|
145
|
+
preset?: SoupPreset;
|
|
139
146
|
/** Controls clustering tightness. Higher = tighter peaks. Default: 2 */
|
|
140
147
|
deviation?: number;
|
|
141
|
-
/** Number of time clusters to distribute events across. Default:
|
|
148
|
+
/** Number of time clusters to distribute events across. Default: numDays*2 */
|
|
142
149
|
peaks?: number;
|
|
143
150
|
/** Offset for the normal distribution center within each peak. Default: 0 */
|
|
144
151
|
mean?: number;
|
|
152
|
+
/** Day-of-week weights (7 elements, index 0=Sunday). Normalized max=1.0. Set null to disable. */
|
|
153
|
+
dayOfWeekWeights?: number[] | null;
|
|
154
|
+
/** Hour-of-day weights (24 elements, index 0=midnight UTC). Normalized max=1.0. Set null to disable. */
|
|
155
|
+
hourOfDayWeights?: number[] | null;
|
|
145
156
|
};
|
|
146
157
|
|
|
158
|
+
/**
|
|
159
|
+
* the soup is a set of parameters that determine the distribution of events over time.
|
|
160
|
+
* Can be a preset name string, a config object, or a config object with a preset base.
|
|
161
|
+
*/
|
|
162
|
+
type soup = SoupPreset | SoupConfig;
|
|
163
|
+
|
|
147
164
|
/**
|
|
148
165
|
* Hook types and when they fire (in order per user):
|
|
149
166
|
* - "user" — user profile object (mutate in-place, return ignored)
|