make-mp-data 2.0.19 → 2.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,185 +21,216 @@ import { makeSCD } from "../generators/scd.js";
21
21
  * @returns {Promise<void>}
22
22
  */
23
23
  export async function userLoop(context) {
24
- const { config, storage, defaults } = context;
25
- const chance = u.getChance();
26
- const concurrency = config?.concurrency || Math.min(os.cpus().length * 2, 16);
27
- const USER_CONN = pLimit(concurrency);
28
-
29
- const {
30
- verbose,
31
- numUsers,
32
- numEvents,
33
- isAnonymous,
34
- hasAvatar,
35
- hasAnonIds,
36
- hasSessionIds,
37
- hasLocation,
38
- funnels,
39
- userProps,
40
- scdProps,
41
- numDays,
42
- percentUsersBornInDataset = 5,
43
- } = config;
44
-
45
- const { eventData, userProfilesData, scdTableData } = storage;
46
- const avgEvPerUser = numEvents / numUsers;
47
- const startTime = Date.now();
48
-
49
- // Create batches for parallel processing
50
- const batchSize = Math.max(1, Math.ceil(numUsers / concurrency));
51
- const userPromises = [];
52
-
53
- for (let i = 0; i < numUsers; i++) {
54
- const userPromise = USER_CONN(async () => {
55
- context.incrementUserCount();
56
- const eps = Math.floor(context.getEventCount() / ((Date.now() - startTime) / 1000));
57
-
58
- if (verbose) {
59
- u.progress([
60
- ["users", context.getUserCount()],
61
- ["events", context.getEventCount()],
62
- ["eps", eps]
63
- ]);
64
- }
65
-
66
- const userId = chance.guid();
67
- const user = u.generateUser(userId, { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds });
68
- const { distinct_id, created } = user;
69
- const userIsBornInDataset = chance.bool({ likelihood: percentUsersBornInDataset });
70
- let numEventsPreformed = 0;
71
-
72
- if (!userIsBornInDataset) delete user.created;
73
-
74
- // Calculate time adjustments
75
- const daysShift = context.getDaysShift();
76
- const adjustedCreated = userIsBornInDataset
77
- ? dayjs(created).subtract(daysShift, 'd')
78
- : dayjs.unix(global.FIXED_BEGIN);
79
-
80
- if (hasLocation) {
81
- const location = u.pickRandom(u.choose(defaults.locationsUsers));
82
- for (const key in location) {
83
- user[key] = location[key];
84
- }
85
- }
86
-
87
- // Profile creation
88
- const profile = await makeUserProfile(context, userProps, user);
89
-
90
- // SCD creation
91
- // @ts-ignore
92
- const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
93
- const scdTableKeys = Object.keys(scdUserTables);
94
-
95
- const userSCD = {};
96
- for (const [index, key] of scdTableKeys.entries()) {
97
- const { max = 100 } = scdProps[key];
98
- const mutations = chance.integer({ min: 1, max });
99
- const changes = await makeSCD(context, scdProps[key], key, distinct_id, mutations, created);
100
- userSCD[key] = changes;
101
-
102
- await config.hook(changes, "scd-pre", {
103
- profile,
104
- type: 'user',
105
- scd: { [key]: scdProps[key] },
106
- config,
107
- allSCDs: userSCD
108
- });
109
- }
110
-
111
- let numEventsThisUserWillPreform = Math.floor(chance.normal({
112
- mean: avgEvPerUser,
113
- dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
114
- }) * 0.714159265359);
115
-
116
- // Power users and low-activity users logic
117
- chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
118
- chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
119
- numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
120
-
121
- let userFirstEventTime;
122
-
123
- const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
124
- const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
125
-
126
- const secondsInDay = 86400;
127
- const noise = () => chance.integer({ min: 0, max: secondsInDay });
128
- let usersEvents = [];
129
-
130
- if (firstFunnels.length && userIsBornInDataset) {
131
- const firstFunnel = chance.pickone(firstFunnels, user);
132
- const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
133
- const [data, userConverted] = await makeFunnel(context, firstFunnel, user, firstTime, profile, userSCD);
134
-
135
- const timeShift = context.getTimeShift();
136
- userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
137
- numEventsPreformed += data.length;
138
- usersEvents = usersEvents.concat(data);
139
-
140
- if (!userConverted) {
141
- // if (verbose) {
142
- // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
143
- // }
144
- return;
145
- }
146
- } else {
147
- userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
148
- }
149
-
150
- while (numEventsPreformed < numEventsThisUserWillPreform) {
151
- if (usageFunnels.length) {
152
- const currentFunnel = chance.pickone(usageFunnels);
153
- const [data, userConverted] = await makeFunnel(context, currentFunnel, user, userFirstEventTime, profile, userSCD);
154
- numEventsPreformed += data.length;
155
- usersEvents = usersEvents.concat(data);
156
- } else {
157
- const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
158
- numEventsPreformed++;
159
- usersEvents = usersEvents.concat(data);
160
- }
161
- }
162
-
163
- // Hook for processing all user events
164
- if (config.hook) {
165
- const newEvents = await config.hook(usersEvents, "everything", {
166
- profile,
167
- scd: userSCD,
168
- config,
169
- userIsBornInDataset
170
- });
171
- if (Array.isArray(newEvents)) usersEvents = newEvents;
172
- }
173
-
174
- // Store all user data
175
- await userProfilesData.hookPush(profile);
176
-
177
- if (Object.keys(userSCD).length) {
178
- for (const [key, changesArray] of Object.entries(userSCD)) {
179
- for (const changes of changesArray) {
180
- try {
181
- const target = scdTableData.filter(arr => arr.scdKey === key).pop();
182
- await target.hookPush(changes, { profile, type: 'user' });
183
- }
184
- catch (e) {
185
- // This is probably a test
186
- const target = scdTableData[0];
187
- await target.hookPush(changes, { profile, type: 'user' });
188
- }
189
- }
190
- }
191
- }
192
-
193
- await eventData.hookPush(usersEvents, { profile });
194
-
195
- if (verbose) {
196
- // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
197
- }
198
- });
199
-
200
- userPromises.push(userPromise);
201
- }
202
-
203
- // Wait for all users to complete
204
- await Promise.all(userPromises);
24
+ const { config, storage, defaults } = context;
25
+ const chance = u.getChance();
26
+ const concurrency = config?.concurrency || Math.min(os.cpus().length * 2, 16);
27
+ const USER_CONN = pLimit(concurrency);
28
+
29
+ const {
30
+ verbose,
31
+ numUsers,
32
+ numEvents,
33
+ isAnonymous,
34
+ hasAvatar,
35
+ hasAnonIds,
36
+ hasSessionIds,
37
+ hasLocation,
38
+ funnels,
39
+ userProps,
40
+ scdProps,
41
+ numDays,
42
+ percentUsersBornInDataset = 5,
43
+ } = config;
44
+
45
+ const { eventData, userProfilesData, scdTableData } = storage;
46
+ const avgEvPerUser = numEvents / numUsers;
47
+ const startTime = Date.now();
48
+
49
+ // Create batches for parallel processing
50
+ const batchSize = Math.max(1, Math.ceil(numUsers / concurrency));
51
+ const userPromises = [];
52
+
53
+ for (let i = 0; i < numUsers; i++) {
54
+ const userPromise = USER_CONN(async () => {
55
+ context.incrementUserCount();
56
+ const eps = Math.floor(context.getEventCount() / ((Date.now() - startTime) / 1000));
57
+
58
+ if (verbose) {
59
+ u.progress([
60
+ ["users", context.getUserCount()],
61
+ ["events", context.getEventCount()],
62
+ ["eps", eps]
63
+ ]);
64
+ }
65
+
66
+ const userId = chance.guid();
67
+ const user = u.generateUser(userId, { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds });
68
+ const { distinct_id, created } = user;
69
+ const userIsBornInDataset = chance.bool({ likelihood: percentUsersBornInDataset });
70
+ let numEventsPreformed = 0;
71
+
72
+ if (!userIsBornInDataset) delete user.created;
73
+
74
+ // Calculate time adjustments
75
+ const daysShift = context.getDaysShift();
76
+ const adjustedCreated = userIsBornInDataset
77
+ ? dayjs(created).subtract(daysShift, 'd')
78
+ : dayjs.unix(global.FIXED_BEGIN);
79
+
80
+ if (hasLocation) {
81
+ const location = u.pickRandom(u.choose(defaults.locationsUsers));
82
+ for (const key in location) {
83
+ user[key] = location[key];
84
+ }
85
+ }
86
+
87
+ // Profile creation
88
+ const profile = await makeUserProfile(context, userProps, user);
89
+
90
+ // Call user hook after profile creation
91
+ if (config.hook) {
92
+ await config.hook(profile, "user", {
93
+ user,
94
+ config,
95
+ userIsBornInDataset
96
+ });
97
+ }
98
+
99
+ // SCD creation
100
+ // @ts-ignore
101
+ const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
102
+ const scdTableKeys = Object.keys(scdUserTables);
103
+
104
+ const userSCD = {};
105
+ for (const [index, key] of scdTableKeys.entries()) {
106
+ const { max = 100 } = scdProps[key];
107
+ const mutations = chance.integer({ min: 1, max });
108
+ const changes = await makeSCD(context, scdProps[key], key, distinct_id, mutations, created);
109
+ userSCD[key] = changes;
110
+
111
+ await config.hook(changes, "scd-pre", {
112
+ profile,
113
+ type: 'user',
114
+ scd: { [key]: scdProps[key] },
115
+ config,
116
+ allSCDs: userSCD
117
+ });
118
+ }
119
+
120
+ let numEventsThisUserWillPreform = Math.floor(chance.normal({
121
+ mean: avgEvPerUser,
122
+ dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
123
+ }) * 0.714159265359);
124
+
125
+ // Power users and low-activity users logic
126
+ chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
127
+ chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
128
+ numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
129
+
130
+ let userFirstEventTime;
131
+
132
+ const firstFunnels = funnels.filter((f) => f.isFirstFunnel)
133
+ .filter((f) => !f.conditions || matchConditions(profile, f.conditions))
134
+ .reduce(weighFunnels, []);
135
+ const usageFunnels = funnels.filter((f) => !f.isFirstFunnel)
136
+ .filter((f) => !f.conditions || matchConditions(profile, f.conditions))
137
+ .reduce(weighFunnels, []);
138
+
139
+ const secondsInDay = 86400;
140
+ const noise = () => chance.integer({ min: 0, max: secondsInDay });
141
+ let usersEvents = [];
142
+
143
+ // PATH FOR USERS BORN IN DATASET AND PERFORMING FIRST FUNNEL
144
+ if (firstFunnels.length && userIsBornInDataset) {
145
+ const firstFunnel = chance.pickone(firstFunnels, user);
146
+ const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
147
+ const [data, userConverted] = await makeFunnel(context, firstFunnel, user, firstTime, profile, userSCD);
148
+
149
+ const timeShift = context.getTimeShift();
150
+ userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
151
+ numEventsPreformed += data.length;
152
+ usersEvents = usersEvents.concat(data);
153
+
154
+ if (!userConverted) {
155
+ // if (verbose) {
156
+ // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
157
+ // }
158
+ return;
159
+ }
160
+ } else {
161
+ userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
162
+ }
163
+
164
+ // ALL SUBSEQUENT FUNNELS
165
+ while (numEventsPreformed < numEventsThisUserWillPreform) {
166
+ if (usageFunnels.length) {
167
+ const currentFunnel = chance.pickone(usageFunnels);
168
+ const [data, userConverted] = await makeFunnel(context, currentFunnel, user, userFirstEventTime, profile, userSCD);
169
+ numEventsPreformed += data.length;
170
+ usersEvents = usersEvents.concat(data);
171
+ } else {
172
+ const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
173
+ numEventsPreformed++;
174
+ usersEvents = usersEvents.concat(data);
175
+ }
176
+ }
177
+
178
+ // Hook for processing all user events
179
+ if (config.hook) {
180
+ const newEvents = await config.hook(usersEvents, "everything", {
181
+ profile,
182
+ scd: userSCD,
183
+ config,
184
+ userIsBornInDataset
185
+ });
186
+ if (Array.isArray(newEvents)) usersEvents = newEvents;
187
+ }
188
+
189
+ // Store all user data
190
+ await userProfilesData.hookPush(profile);
191
+
192
+ if (Object.keys(userSCD).length) {
193
+ for (const [key, changesArray] of Object.entries(userSCD)) {
194
+ for (const changes of changesArray) {
195
+ try {
196
+ const target = scdTableData.filter(arr => arr.scdKey === key).pop();
197
+ await target.hookPush(changes, { profile, type: 'user' });
198
+ }
199
+ catch (e) {
200
+ // This is probably a test
201
+ const target = scdTableData[0];
202
+ await target.hookPush(changes, { profile, type: 'user' });
203
+ }
204
+ }
205
+ }
206
+ }
207
+
208
+ await eventData.hookPush(usersEvents, { profile });
209
+
210
+ if (verbose) {
211
+ // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
212
+ }
213
+ });
214
+
215
+ userPromises.push(userPromise);
216
+ }
217
+
218
+ // Wait for all users to complete
219
+ await Promise.all(userPromises);
220
+ }
221
+
222
+
223
+ export function weighFunnels(acc, funnel) {
224
+ const weight = funnel?.weight || 1;
225
+ for (let i = 0; i < weight; i++) {
226
+ acc.push(funnel);
227
+ }
228
+ return acc;
229
+ }
230
+
231
+ export function matchConditions(profile, conditions) {
232
+ for (const [key, value] of Object.entries(conditions)) {
233
+ if (profile[key] !== value) return false;
234
+ }
235
+ return true;
205
236
  }
@@ -3,6 +3,9 @@
3
3
  * Handles distributed processing across multiple cloud function workers
4
4
  */
5
5
 
6
+ /** @typedef {import('../../types.js').Context} Context */
7
+ /** @typedef {import('../../types.js').Dungeon} Dungeon */
8
+
6
9
  import pLimit from 'p-limit';
7
10
  import { GoogleAuth } from 'google-auth-library';
8
11
  import { timer, uid, sLog } from 'ak-tools';
@@ -138,7 +141,7 @@ export async function handleCloudFunctionEntry(req, res, mainFunction) {
138
141
  try {
139
142
  if (!script) throw new Error("no script");
140
143
 
141
- /** @type {Config} */
144
+ /** @type {Dungeon} */
142
145
  const config = eval(script);
143
146
 
144
147
  if (isReplica) {
@@ -148,7 +151,7 @@ export async function handleCloudFunctionEntry(req, res, mainFunction) {
148
151
  params.seed = newSeed;
149
152
  }
150
153
 
151
- /** @type {Config} */
154
+ /** @type {Dungeon} */
152
155
  const optionsYouCantChange = {
153
156
  verbose: false
154
157
  };
@@ -0,0 +1,159 @@
1
+ /**
2
+ * A "validValue" can be a primitive, an array of primitives,
3
+ * or a function that returns a primitive or an array of primitives.
4
+ * This is the building block for all property values in the dungeon.
5
+ */
6
+ type Primitives = string | number | boolean | Date;
7
+ type ValueValid = Primitives | Primitives[] | (() => Primitives | Primitives[]);
8
+
9
+
10
+ /**
11
+ * The main configuration object for the entire data generation spec, known as a "Dungeon".
12
+ * This is the high-level object you will be constructing.
13
+ */
14
+ export interface Dungeon {
15
+ /** A list of all possible events that can occur in the simulation. */
16
+ events?: EventConfig[];
17
+
18
+ /** A list of event sequences that represent user journeys (e.g., sign-up, purchase). */
19
+ funnels?: Funnel[];
20
+
21
+ /** Properties that are attached to every event for all users. */
22
+ superProps?: Record<string, ValueValid>;
23
+
24
+ /** Properties that define the characteristics of individual users. */
25
+ userProps?: Record<string, ValueValid>;
26
+
27
+ /** Properties that change for users or groups over time (Slowly Changing Dimensions). */
28
+ scdProps?: Record<string, SCDProp>;
29
+
30
+ /** Defines group entities, like companies or teams, and how many of each to create. */
31
+ groupKeys?: [string, number, string[]?][]; // [key, numGroups, optional_events_for_group]
32
+
33
+ /** Properties that define the characteristics of the groups defined in groupKeys. */
34
+ groupProps?: Record<string, Record<string, ValueValid>>;
35
+
36
+ /** Events that are attributed to a group entity rather than an individual user. */
37
+ groupEvents?: GroupEventConfig[];
38
+
39
+ /** Static data tables (e.g., product catalogs) that can be referenced in events. */
40
+ lookupTables?: LookupTableSchema[];
41
+
42
+ }
43
+
44
+
45
+ /**
46
+ * Defines a single event, its properties, and its likelihood of occurring.
47
+ */
48
+ interface EventConfig {
49
+ /** The name of the event (e.g., "Page View", "Add to Cart"). */
50
+ event?: string;
51
+
52
+ /** The relative frequency of this event compared to others. Higher numbers are more frequent. */
53
+ weight?: number;
54
+
55
+ /** A dictionary of properties associated with this event. */
56
+ properties?: Record<string, ValueValid>;
57
+
58
+ /** If true, this event will be the first event for a new user. */
59
+ isFirstEvent?: boolean;
60
+
61
+ /** If true, this event signifies that a user has churned. */
62
+ isChurnEvent?: boolean;
63
+ }
64
+
65
+
66
+ /**
67
+ * Defines a sequence of events that represents a meaningful user journey or workflow.
68
+ */
69
+ interface Funnel {
70
+ /** The name of the funnel (e.g., "Purchase Funnel"). */
71
+ name?: string;
72
+
73
+ /** A list of event names that make up the sequence of this funnel. */
74
+ sequence: string[];
75
+
76
+ /** The likelihood that a user will attempt this funnel. */
77
+ weight?: number;
78
+
79
+ /** If true, this funnel is part of the initial user experience (e.g., onboarding). */
80
+ isFirstFunnel?: boolean;
81
+
82
+ /** The probability (0-100) that a user who starts the funnel will complete it. */
83
+ conversionRate?: number;
84
+
85
+ /** The average time (in hours) it takes a user to complete the funnel. */
86
+ timeToConvert?: number;
87
+
88
+ /** * Defines the ordering of events within the funnel for a user.
89
+ * "sequential": Events must happen in the exact order defined in `sequence`.
90
+ * "random": Events can happen in any order.
91
+ * "first-and-last-fixed": The first and last events are fixed, but the middle ones are random.
92
+ */
93
+ order?: "sequential" | "random" | "first-and-last-fixed" | "first-fixed" | "last-fixed" | "interrupted";
94
+
95
+ /** Properties that will be attached to every event generated within this funnel journey. */
96
+ props?: Record<string, ValueValid>;
97
+
98
+ /** User property conditions that determine which users are eligible for this funnel. Only users whose properties match these conditions will be considered for this funnel. */
99
+ conditions?: Record<string, ValueValid>;
100
+ }
101
+
102
+
103
+ /**
104
+ * Defines a "Slowly Changing Dimension" — a property of a user or group
105
+ * that changes periodically over time (e.g., subscription plan, user role).
106
+ */
107
+ interface SCDProp {
108
+ /** The entity this property belongs to ('user' or a group key like 'company_id'). */
109
+ type?: string;
110
+
111
+ /** How often the value of this property can change. */
112
+ frequency: "day" | "week" | "month" | "year";
113
+
114
+ /** A list or function defining the possible values for this property. */
115
+ values: ValueValid;
116
+
117
+ /** * 'fixed': Changes occur exactly on the frequency interval.
118
+ * 'fuzzy': Changes occur randomly around the frequency interval.
119
+ */
120
+ timing: "fixed" | "fuzzy";
121
+
122
+ /** The maximum number of times this property can change for a single entity. */
123
+ max?: number;
124
+ }
125
+
126
+
127
+ /**
128
+ * Defines an event that is attributed to a group and occurs on a regular schedule.
129
+ * (e.g., a monthly subscription charge for a company).
130
+ */
131
+ interface GroupEventConfig extends EventConfig {
132
+ /** How often the event occurs (in days). */
133
+ frequency: number;
134
+
135
+ /** The group key this event is associated with (e.g., "company_id"). */
136
+ group_key: string;
137
+
138
+ /** If true, a random user within the group is also associated with the event. */
139
+ attribute_to_user: boolean;
140
+
141
+ /** The number of entities in this group. */
142
+ group_size: number;
143
+ }
144
+
145
+
146
+ /**
147
+ * Defines the schema for a static lookup table, which can be used to enrich event data.
148
+ * For example, a "products" table could hold details about product IDs.
149
+ */
150
+ interface LookupTableSchema {
151
+ /** The name of the key that will be used to join this table in an event (e.g., "product_id"). */
152
+ key: string;
153
+
154
+ /** The number of unique rows to generate for this table. */
155
+ entries: number;
156
+
157
+ /** A dictionary of attributes (columns) for the table and their possible values. */
158
+ attributes: Record<string, ValueValid>;
159
+ }
@@ -4,8 +4,8 @@
4
4
  * @fileoverview Contains default values for campaigns, devices, locations, and domains
5
5
  */
6
6
 
7
- /** @typedef {import('../../types.d.ts').main.Dungeon} Config */
8
- /** @typedef {import('../../types.d.ts').main.ValueValid} ValueValid */
7
+ /** @typedef {import('../../types.js').Dungeon} Config */
8
+ /** @typedef {import('../../types.js').ValueValid} ValueValid */
9
9
 
10
10
  //? https://docs.mixpanel.com/docs/data-structure/property-reference#default-properties
11
11