make-mp-data 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js ADDED
@@ -0,0 +1,1427 @@
1
+ #! /usr/bin/env node
2
+
3
+ /*
4
+ make fake mixpanel data easily!
5
+ by AK
6
+ ak@mixpanel.com
7
+ */
8
+
9
+ //todo: churn implementation
10
+ //todo: regular interval events (like 'card charged')
11
+ //todo: SCDs send to mixpanel
12
+ //todo: decent 'new dungeon' workflow
13
+
14
+
15
+ //TIME
16
+ const dayjs = require("dayjs");
17
+ const utc = require("dayjs/plugin/utc");
18
+ dayjs.extend(utc);
19
+ const FIXED_NOW = dayjs('2024-02-02').unix();
20
+ global.FIXED_NOW = FIXED_NOW;
21
+ // ^ this creates a FIXED POINT in time; we will shift it later
22
+ let FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(90, 'd').unix();
23
+ const actualNow = dayjs();
24
+ const timeShift = actualNow.diff(dayjs.unix(FIXED_NOW), "seconds");
25
+ const daysShift = actualNow.diff(dayjs.unix(FIXED_NOW), "days");
26
+
27
+ // UTILS
28
+ const { existsSync } = require("fs");
29
+ const pLimit = require('p-limit');
30
+ const os = require("os");
31
+ const path = require("path");
32
+ const { comma, bytesHuman, makeName, md5, clone, tracker, uid, timer, ls, rm } = require("ak-tools");
33
+ const jobTimer = timer('job');
34
+ const { generateLineChart } = require('./components/chart.js');
35
+ const { version } = require('./package.json');
36
+ const mp = require("mixpanel-import");
37
+ const u = require("./components/utils.js");
38
+ const getCliParams = require("./components/cli.js");
39
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
40
+
41
+ // DEFAULTS
42
+ const { campaigns, devices, locations } = require('./components/defaults.js');
43
+ let CAMPAIGNS;
44
+ let DEFAULTS;
45
+ /** @type {Storage} */
46
+ let STORAGE;
47
+ /** @type {Config} */
48
+ let CONFIG;
49
+ require('dotenv').config();
50
+
51
+
52
+ // RUN STATE
53
+ let VERBOSE = false;
54
+ let isCLI = false;
55
+ // if we are running in batch mode, we MUST write to disk before we can send to mixpanel
56
+ let isBATCH_MODE = false;
57
+ let BATCH_SIZE = 500_000;
58
+
59
+ //todo: these should be moved into the hookedArrays
60
+ let operations = 0;
61
+ let eventCount = 0;
62
+ let userCount = 0;
63
+
64
+
65
+
66
+ /**
67
+ * generates fake mixpanel data
68
+ * @param {Config} config
69
+ */
70
+ async function main(config) {
71
+ jobTimer.start();
72
+ const seedWord = process.env.SEED || config.seed || "hello friend!";
73
+ config.seed = seedWord;
74
+ const chance = u.initChance(seedWord);
75
+ //seed the random number generator, get it with getChance()
76
+ // ^ this is critical; same seed = same data;
77
+ // ^ seed can be passed in as an env var or in the config
78
+ validateDungeonConfig(config);
79
+ global.FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(config.numDays, 'd').unix();
80
+
81
+ //GLOBALS
82
+ CONFIG = config;
83
+ VERBOSE = config.verbose;
84
+ CAMPAIGNS = campaigns;
85
+ DEFAULTS = {
86
+ locationsUsers: u.pickAWinner(clone(locations).map(l => { delete l.country; return l; }), 0),
87
+ locationsEvents: u.pickAWinner(clone(locations).map(l => { delete l.country_code; return l; }), 0),
88
+ iOSDevices: u.pickAWinner(devices.iosDevices, 0),
89
+ androidDevices: u.pickAWinner(devices.androidDevices, 0),
90
+ desktopDevices: u.pickAWinner(devices.desktopDevices, 0),
91
+ browsers: u.pickAWinner(devices.browsers, 0),
92
+ campaigns: u.pickAWinner(campaigns, 0),
93
+ };
94
+
95
+
96
+ //TRACKING
97
+ const runId = uid(42);
98
+ const { events, superProps, userProps, scdProps, groupKeys, groupProps, lookupTables, soup, hook, mirrorProps, ...trackingParams } = config;
99
+ let { funnels } = config;
100
+ trackingParams.runId = runId;
101
+ trackingParams.version = version;
102
+ delete trackingParams.funnels;
103
+
104
+ //STORAGE
105
+ const { simulationName, format } = config;
106
+ const eventData = await makeHookArray([], { hook, type: "event", config, format, filepath: `${simulationName}-EVENTS` });
107
+ const userProfilesData = await makeHookArray([], { hook, type: "user", config, format, filepath: `${simulationName}-USERS` });
108
+ const adSpendData = await makeHookArray([], { hook, type: "ad-spend", config, format, filepath: `${simulationName}-AD-SPEND` });
109
+
110
+ // SCDs, Groups, + Lookups may have multiple tables
111
+ const scdTableKeys = Object.keys(scdProps);
112
+ const scdTableData = await Promise.all(scdTableKeys.map(async (key) =>
113
+ await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, filepath: `${simulationName}-SCD-${key}` })
114
+ ));
115
+ const groupTableKeys = Object.keys(groupKeys);
116
+ const groupProfilesData = await Promise.all(groupTableKeys.map(async (key, index) => {
117
+ const groupKey = groupKeys[index]?.slice()?.shift();
118
+ return await makeHookArray([], { hook, type: "group", config, format, groupKey, filepath: `${simulationName}-GROUPS-${groupKey}` });
119
+ }));
120
+
121
+ const lookupTableKeys = Object.keys(lookupTables);
122
+ const lookupTableData = await Promise.all(lookupTableKeys.map(async (key, index) => {
123
+ const lookupKey = lookupTables[index].key;
124
+ return await makeHookArray([], { hook, type: "lookup", config, format, lookupKey: lookupKey, filepath: `${simulationName}-LOOKUP-${lookupKey}` });
125
+ }));
126
+
127
+ const mirrorEventData = await makeHookArray([], { hook, type: "mirror", config, format, filepath: `${simulationName}-MIRROR` });
128
+
129
+ STORAGE = { eventData, userProfilesData, scdTableData, groupProfilesData, lookupTableData, mirrorEventData, adSpendData };
130
+
131
+
132
+ track('start simulation', trackingParams);
133
+ log(`------------------SETUP------------------`);
134
+ log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
135
+ log(`and your configuration is:\n\n`, JSON.stringify(trackingParams, null, 2));
136
+ log(`------------------SETUP------------------`, "\n");
137
+
138
+
139
+
140
+ //USERS
141
+ log(`---------------SIMULATION----------------`, "\n\n");
142
+ const { concurrency = 1 } = config;
143
+ await userLoop(config, STORAGE, concurrency);
144
+ const { hasAdSpend, epochStart, epochEnd } = config;
145
+
146
+ // AD SPEND
147
+ if (hasAdSpend) {
148
+ const days = u.datesBetween(epochStart, epochEnd);
149
+ for (const day of days) {
150
+ const dailySpendData = await makeAdSpend(day);
151
+ for (const spendEvent of dailySpendData) {
152
+ await adSpendData.hookPush(spendEvent);
153
+ }
154
+ }
155
+
156
+ }
157
+
158
+
159
+ log("\n");
160
+
161
+ //GROUP PROFILES
162
+ for (const [index, groupPair] of groupKeys.entries()) {
163
+ const groupKey = groupPair[0];
164
+ const groupCardinality = groupPair[1];
165
+ for (let i = 1; i < groupCardinality + 1; i++) {
166
+ if (VERBOSE) u.progress([["groups", i]]);
167
+ const props = await makeProfile(groupProps[groupKey]);
168
+ const group = {
169
+ [groupKey]: i,
170
+ ...props,
171
+ };
172
+ group["distinct_id"] = i.toString();
173
+ await groupProfilesData[index].hookPush(group);
174
+ }
175
+ }
176
+ log("\n");
177
+
178
+ //LOOKUP TABLES
179
+ for (const [index, lookupTable] of lookupTables.entries()) {
180
+ const { key, entries, attributes } = lookupTable;
181
+ for (let i = 1; i < entries + 1; i++) {
182
+ if (VERBOSE) u.progress([["lookups", i]]);
183
+ const props = await makeProfile(attributes);
184
+ const item = {
185
+ [key]: i,
186
+ ...props,
187
+ };
188
+ await lookupTableData[index].hookPush(item);
189
+ }
190
+
191
+ }
192
+ log("\n");
193
+
194
+
195
+ // MIRROR
196
+ if (Object.keys(mirrorProps).length) await makeMirror(config, STORAGE);
197
+
198
+
199
+ log("\n");
200
+ log(`---------------SIMULATION----------------`, "\n");
201
+
202
+ // draw charts
203
+ const { makeChart } = config;
204
+ if (makeChart) {
205
+ const bornEvents = config.events?.filter((e) => e?.isFirstEvent)?.map(e => e.event) || [];
206
+ const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
207
+ const bornBehaviors = [...bornEvents, ...bornFunnels];
208
+ const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
209
+ }
210
+ const { writeToDisk, token } = config;
211
+ if (!writeToDisk && !token) {
212
+ jobTimer.stop(false);
213
+ const { start, end, delta, human } = jobTimer.report(false);
214
+ // this is awkward, but i couldn't figure out any other way to assert a type in jsdoc
215
+ const i = /** @type {any} */ (STORAGE);
216
+ i.time = { start, end, delta, human };
217
+ const j = /** @type {Result} */ (i);
218
+ return j;
219
+
220
+ }
221
+
222
+ log(`-----------------WRITES------------------`, `\n\n`);
223
+
224
+ // write to disk and/or send to mixpanel
225
+ let files;
226
+ if (writeToDisk) {
227
+ for (const key in STORAGE) {
228
+ const table = STORAGE[key];
229
+ if (table.length && typeof table.flush === "function") {
230
+ await table.flush();
231
+ } else {
232
+ if (Array.isArray(table) && typeof table[0]?.flush === "function") {
233
+ for (const subTable of table) {
234
+ await subTable.flush();
235
+ }
236
+ }
237
+ }
238
+ }
239
+ }
240
+ let importResults;
241
+ if (token) importResults = await sendToMixpanel(config, STORAGE);
242
+
243
+
244
+ log(`\n-----------------WRITES------------------`, "\n");
245
+ track('end simulation', trackingParams);
246
+ jobTimer.stop(false);
247
+ const { start, end, delta, human } = jobTimer.report(false);
248
+
249
+ return {
250
+ ...STORAGE,
251
+ importResults,
252
+ files,
253
+ time: { start, end, delta, human },
254
+ };
255
+ }
256
+
257
+
258
+
259
+
260
+ /*
261
+ ------
262
+ MODELS
263
+ ------
264
+ */
265
+
266
+ /**
267
+ * creates a mixpanel event with a flat shape
268
+ * @param {string} distinct_id
269
+ * @param {number} earliestTime
270
+ * @param {EventConfig} chosenEvent
271
+ * @param {string[]} [anonymousIds]
272
+ * @param {string[]} [sessionIds]
273
+ * @param {Object} [superProps]
274
+ * @param {Object} [groupKeys]
275
+ * @param {Boolean} [isFirstEvent]
276
+ * @return {Promise<EventSchema>}
277
+ */
278
+ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, sessionIds, superProps, groupKeys, isFirstEvent) {
279
+ operations++;
280
+ eventCount++;
281
+ if (!distinct_id) throw new Error("no distinct_id");
282
+ if (!anonymousIds) anonymousIds = [];
283
+ if (!sessionIds) sessionIds = [];
284
+ if (!earliestTime) throw new Error("no earliestTime");
285
+ if (!chosenEvent) throw new Error("no chosenEvent");
286
+ if (!superProps) superProps = {};
287
+ if (!groupKeys) groupKeys = [];
288
+ if (!isFirstEvent) isFirstEvent = false;
289
+ const chance = u.getChance();
290
+ const { mean = 0, deviation = 2, peaks = 5 } = CONFIG?.soup || {};
291
+ const {
292
+ hasAndroidDevices = false,
293
+ hasBrowser = false,
294
+ hasCampaigns = false,
295
+ hasDesktopDevices = false,
296
+ hasIOSDevices = false,
297
+ hasLocation = false
298
+ } = CONFIG || {};
299
+
300
+ //event model
301
+ const eventTemplate = {
302
+ event: chosenEvent.event,
303
+ source: "dm4",
304
+ time: "",
305
+ insert_id: "",
306
+ };
307
+
308
+ let defaultProps = {};
309
+ let devicePool = [];
310
+ if (hasLocation) defaultProps.location = DEFAULTS.locationsEvents();
311
+ if (hasBrowser) defaultProps.browser = DEFAULTS.browsers();
312
+ if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
313
+ if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
314
+ if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
315
+ // we don't always have campaigns, because of attribution
316
+ if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = DEFAULTS.campaigns();
317
+ const devices = devicePool.flat();
318
+ if (devices.length) defaultProps.device = devices;
319
+
320
+
321
+ //event time
322
+ // if (earliestTime > FIXED_NOW) {
323
+ // earliestTime = dayjs(u.TimeSoup(global.FIXED_BEGIN)).unix();
324
+ // };
325
+
326
+ if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
327
+ if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, FIXED_NOW, peaks, deviation, mean);
328
+ // eventTemplate.time = u.TimeSoup(earliestTime, FIXED_NOW, peaks, deviation, mean);
329
+
330
+ // anonymous and session ids
331
+ if (anonymousIds.length) eventTemplate.device_id = chance.pickone(anonymousIds);
332
+ if (sessionIds.length) eventTemplate.session_id = chance.pickone(sessionIds);
333
+
334
+ //sometimes have a user_id
335
+ if (!isFirstEvent && chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
336
+
337
+ // ensure that there is a user_id or device_id
338
+ if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
339
+
340
+ const props = { ...chosenEvent.properties, ...superProps };
341
+
342
+ //iterate through custom properties
343
+ for (const key in props) {
344
+ try {
345
+ eventTemplate[key] = u.choose(props[key]);
346
+ } catch (e) {
347
+ console.error(`error with ${key} in ${chosenEvent.event} event`, e);
348
+ debugger;
349
+ }
350
+ }
351
+
352
+ //iterate through default properties
353
+ for (const key in defaultProps) {
354
+ if (Array.isArray(defaultProps[key])) {
355
+ const choice = u.choose(defaultProps[key]);
356
+ if (typeof choice === "string") {
357
+ if (!eventTemplate[key]) eventTemplate[key] = choice;
358
+ }
359
+
360
+ else if (Array.isArray(choice)) {
361
+ for (const subChoice of choice) {
362
+ if (!eventTemplate[key]) eventTemplate[key] = subChoice;
363
+ }
364
+ }
365
+
366
+ else if (typeof choice === "object") {
367
+ for (const subKey in choice) {
368
+ if (typeof choice[subKey] === "string") {
369
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = choice[subKey];
370
+ }
371
+ else if (Array.isArray(choice[subKey])) {
372
+ const subChoice = u.choose(choice[subKey]);
373
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
374
+ }
375
+
376
+ else if (typeof choice[subKey] === "object") {
377
+ for (const subSubKey in choice[subKey]) {
378
+ if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = choice[subKey][subSubKey];
379
+ }
380
+ }
381
+
382
+ }
383
+ }
384
+
385
+
386
+ }
387
+ }
388
+
389
+ //iterate through groups
390
+ for (const groupPair of groupKeys) {
391
+ const groupKey = groupPair[0];
392
+ const groupCardinality = groupPair[1];
393
+ const groupEvents = groupPair[2] || [];
394
+
395
+ // empty array for group events means all events
396
+ if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
397
+ if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
398
+ }
399
+
400
+ //make $insert_id
401
+ eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
402
+
403
+ // move time forward
404
+ const timeShifted = dayjs(eventTemplate.time).add(timeShift, "seconds").toISOString();
405
+ eventTemplate.time = timeShifted;
406
+
407
+
408
+ return eventTemplate;
409
+ }
410
+
411
+ /**
412
+ * takes a description of a funnel an generates successful and unsuccessful conversions
413
+ * this is called MANY times per user
414
+ * @param {Funnel} funnel
415
+ * @param {Person} user
416
+ * @param {number} firstEventTime
417
+ * @param {UserProfile | Object} [profile]
418
+ * @param {Record<string, SCDSchema[]> | Object} [scd]
419
+ * @param {Config} [config]
420
+ * @return {Promise<[EventSchema[], Boolean]>}
421
+ */
422
+ async function makeFunnel(funnel, user, firstEventTime, profile, scd, config) {
423
+ if (!funnel) throw new Error("no funnel");
424
+ if (!user) throw new Error("no user");
425
+ if (!profile) profile = {};
426
+ if (!scd) scd = {};
427
+
428
+ const chance = u.getChance();
429
+ const { hook = async (a) => a } = config;
430
+ await hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
431
+ let {
432
+ sequence,
433
+ conversionRate = 50,
434
+ order = 'sequential',
435
+ timeToConvert = 1,
436
+ props,
437
+ requireRepeats = false,
438
+ } = funnel;
439
+ const { distinct_id, created, anonymousIds, sessionIds } = user;
440
+ const { superProps, groupKeys } = config;
441
+
442
+
443
+ //choose the properties for this funnel
444
+ const chosenFunnelProps = { ...props, ...superProps };
445
+ for (const key in props) {
446
+ try {
447
+ chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
448
+ } catch (e) {
449
+ console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
450
+ debugger;
451
+ }
452
+ }
453
+
454
+ const funnelPossibleEvents = sequence
455
+ .map((eventName) => {
456
+ const foundEvent = config?.events?.find((e) => e.event === eventName);
457
+ /** @type {EventConfig} */
458
+ const eventSpec = clone(foundEvent) || { event: eventName, properties: {} };
459
+ for (const key in eventSpec.properties) {
460
+ try {
461
+ eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
462
+ } catch (e) {
463
+ console.error(`error with ${key} in ${eventSpec.event} event`, e);
464
+ debugger;
465
+ }
466
+ }
467
+ delete eventSpec.isFirstEvent;
468
+ delete eventSpec.weight;
469
+ eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
470
+ return eventSpec;
471
+ })
472
+ .reduce((acc, step) => {
473
+ if (!requireRepeats) {
474
+ if (acc.find(e => e.event === step.event)) {
475
+ if (chance.bool({ likelihood: 50 })) {
476
+ conversionRate = Math.floor(conversionRate * 1.35); //increase conversion rate
477
+ acc.push(step);
478
+ }
479
+ //A SKIPPED STEP!
480
+ else {
481
+ conversionRate = Math.floor(conversionRate * .70); //reduce conversion rate
482
+ return acc; //early return to skip the step
483
+ }
484
+ }
485
+ else {
486
+ acc.push(step);
487
+ }
488
+ }
489
+ else {
490
+ acc.push(step);
491
+ }
492
+ return acc;
493
+ }, []);
494
+
495
+ let doesUserConvert = chance.bool({ likelihood: conversionRate });
496
+ let numStepsUserWillTake = sequence.length;
497
+ if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
498
+ const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
499
+ const msInHour = 60000 * 60;
500
+ const funnelStepsUserWillTake = funnelPossibleEvents.slice(0, numStepsUserWillTake);
501
+
502
+ let funnelActualOrder = [];
503
+
504
+ switch (order) {
505
+ case "sequential":
506
+ funnelActualOrder = funnelStepsUserWillTake;
507
+ break;
508
+ case "random":
509
+ funnelActualOrder = u.shuffleArray(funnelStepsUserWillTake);
510
+ break;
511
+ case "first-fixed":
512
+ funnelActualOrder = u.shuffleExceptFirst(funnelStepsUserWillTake);
513
+ break;
514
+ case "last-fixed":
515
+ funnelActualOrder = u.shuffleExceptLast(funnelStepsUserWillTake);
516
+ break;
517
+ case "first-and-last-fixed":
518
+ funnelActualOrder = u.fixFirstAndLast(funnelStepsUserWillTake);
519
+ break;
520
+ case "middle-fixed":
521
+ funnelActualOrder = u.shuffleOutside(funnelStepsUserWillTake);
522
+ break;
523
+ case "interrupted":
524
+ const potentialSubstitutes = config?.events
525
+ ?.filter(e => !e.isFirstEvent)
526
+ ?.filter(e => !sequence.includes(e.event)) || [];
527
+ funnelActualOrder = u.interruptArray(funnelStepsUserWillTake, potentialSubstitutes);
528
+ break;
529
+ default:
530
+ funnelActualOrder = funnelStepsUserWillTake;
531
+ break;
532
+ }
533
+
534
+
535
+
536
+ let lastTimeJump = 0;
537
+ const funnelActualEventsWithOffset = funnelActualOrder
538
+ .map((event, index) => {
539
+ if (index === 0) {
540
+ event.relativeTimeMs = 0;
541
+ return event;
542
+ }
543
+
544
+ // Calculate base increment for each step
545
+ const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
546
+
547
+ // Introduce a random fluctuation factor
548
+ const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
549
+
550
+ // Ensure the time increments are increasing and add randomness
551
+ const previousTime = lastTimeJump;
552
+ const currentTime = previousTime + baseIncrement + fluctuation;
553
+
554
+ // Assign the calculated time to the event
555
+ const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
556
+ lastTimeJump = chosenTime;
557
+ event.relativeTimeMs = chosenTime;
558
+ return event;
559
+ });
560
+
561
+
562
+ const earliestTime = firstEventTime || dayjs(created).unix();
563
+ let funnelStartTime;
564
+ let finalEvents = await Promise.all(funnelActualEventsWithOffset
565
+ .map(async (event, index) => {
566
+ const newEvent = await makeEvent(distinct_id, earliestTime, event, anonymousIds, sessionIds, {}, groupKeys);
567
+ if (index === 0) {
568
+ funnelStartTime = dayjs(newEvent.time);
569
+ delete newEvent.relativeTimeMs;
570
+ return Promise.resolve(newEvent);
571
+ }
572
+ try {
573
+ newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
574
+ delete newEvent.relativeTimeMs;
575
+ return Promise.resolve(newEvent);
576
+ }
577
+ catch (e) {
578
+ //shouldn't happen
579
+ debugger;
580
+ }
581
+ }));
582
+
583
+
584
+ await hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
585
+ return [finalEvents, doesUserConvert];
586
+ }
587
+
588
+ /**
589
+ * a function that creates a profile (user or group)
590
+ * @overload
591
+ * @param {{[key: string]: ValueValid}} props
592
+ * @param {{[key: string]: ValueValid}} [defaults]
593
+ * @returns {Promise<UserProfile>}
594
+ *
595
+ * @overload
596
+ * @param {{[key: string]: ValueValid}} props
597
+ * @param {{[key: string]: ValueValid}} [defaults]
598
+ * @returns {Promise<GroupProfile>}
599
+ */
600
+ async function makeProfile(props, defaults) {
601
+ operations++;
602
+
603
+ const profile = {
604
+ ...defaults,
605
+ };
606
+
607
+ for (const key in props) {
608
+ try {
609
+ profile[key] = u.choose(props[key]);
610
+ } catch (e) {
611
+ // never gets here
612
+ // debugger;
613
+ }
614
+ }
615
+
616
+ return profile;
617
+ }
618
+
619
+ /**
620
+ * @param {ValueValid} prop
621
+ * @param {string} scdKey
622
+ * @param {string} distinct_id
623
+ * @param {number} mutations
624
+ * @param {string} created
625
+ * @return {Promise<SCDSchema[]>}
626
+ */
627
+ async function makeSCD(prop, scdKey, distinct_id, mutations, created) {
628
+ if (JSON.stringify(prop) === "{}" || JSON.stringify(prop) === "[]") return [];
629
+ const scdEntries = [];
630
+ let lastInserted = dayjs(created);
631
+ const deltaDays = dayjs().diff(lastInserted, "day");
632
+
633
+ for (let i = 0; i < mutations; i++) {
634
+ if (lastInserted.isAfter(dayjs())) break;
635
+ let scd = await makeProfile({ [scdKey]: prop }, { distinct_id });
636
+
637
+ // Explicitly constructing SCDSchema object with all required properties
638
+ const scdEntry = {
639
+ ...scd, // spread existing properties
640
+ distinct_id: scd.distinct_id || distinct_id, // ensure distinct_id is set
641
+ insertTime: lastInserted.add(u.integer(1, 1000), "seconds").toISOString(),
642
+ startTime: lastInserted.toISOString()
643
+ };
644
+
645
+ // Ensure TypeScript sees all required properties are set
646
+ if (scdEntry.hasOwnProperty('insertTime') && scdEntry.hasOwnProperty('startTime')) {
647
+ scdEntries.push(scdEntry);
648
+ }
649
+
650
+ lastInserted = lastInserted
651
+ .add(u.integer(0, deltaDays), "day")
652
+ .subtract(u.integer(1, 1000), "seconds");
653
+ }
654
+
655
+ return scdEntries;
656
+ }
657
+
658
+
659
+ /**
660
+ * creates ad spend events for a given day for all campaigns in default campaigns
661
+ * @param {string} day
662
+ * @return {Promise<EventSchema[]>}
663
+ */
664
+ async function makeAdSpend(day, campaigns = CAMPAIGNS) {
665
+ operations++;
666
+ const chance = u.getChance();
667
+ const adSpendEvents = [];
668
+ for (const network of campaigns) {
669
+ const campaigns = network.utm_campaign;
670
+ loopCampaigns: for (const campaign of campaigns) {
671
+ if (campaign === "$organic") continue loopCampaigns;
672
+
673
+ const CAC = u.integer(42, 420); //todo: get the # of users created in this day from eventData
674
+ // Randomly generating cost
675
+ const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
676
+
677
+ // Ensuring realistic CPC and CTR
678
+ const avgCPC = chance.floating({ min: 0.33, max: 2.00, fixed: 4 });
679
+ const avgCTR = chance.floating({ min: 0.05, max: 0.25, fixed: 4 });
680
+
681
+ // Deriving impressions from cost and avg CPC
682
+ const clicks = Math.floor(cost / avgCPC);
683
+ const impressions = Math.floor(clicks / avgCTR);
684
+ const views = Math.floor(impressions * avgCTR);
685
+
686
+ //tags
687
+ const utm_medium = u.choose(u.pickAWinner(network.utm_medium)());
688
+ const utm_content = u.choose(u.pickAWinner(network.utm_content)());
689
+ const utm_term = u.choose(u.pickAWinner(network.utm_term)());
690
+ //each of these is a campaign
691
+ const id = network.utm_source[0] + '-' + campaign;
692
+ const uid = md5(id);
693
+ const adSpendEvent = {
694
+ event: "$ad_spend",
695
+ time: day,
696
+ source: 'dm4',
697
+ utm_campaign: campaign,
698
+ campaign_id: id,
699
+ insert_id: uid,
700
+ network: network.utm_source[0].toUpperCase(),
701
+ distinct_id: network.utm_source[0].toUpperCase(),
702
+ utm_source: network.utm_source[0],
703
+ utm_medium,
704
+ utm_content,
705
+ utm_term,
706
+
707
+
708
+ clicks,
709
+ views,
710
+ impressions,
711
+ cost,
712
+ date: dayjs(day).format("YYYY-MM-DD"),
713
+ };
714
+ adSpendEvents.push(adSpendEvent);
715
+ }
716
+
717
+
718
+ }
719
+ return adSpendEvents;
720
+ }
721
+
722
+ /**
723
+ * takes event data and creates mirror datasets in a future state
724
+ * depending on the mirror strategy
725
+ * @param {Config} config
726
+ * @param {Storage} storage
727
+ * @return {Promise<void>}
728
+ */
729
+ async function makeMirror(config, storage) {
730
+ const { mirrorProps } = config;
731
+ const { eventData, mirrorEventData } = storage;
732
+ const now = dayjs();
733
+
734
+ for (const oldEvent of eventData) {
735
+ let newEvent;
736
+ const eventTime = dayjs(oldEvent.time);
737
+ const delta = now.diff(eventTime, "day");
738
+
739
+ for (const mirrorProp in mirrorProps) {
740
+ const prop = mirrorProps[mirrorProp];
741
+ const { daysUnfilled = 7, events = "*", strategy = "create", values = [] } = prop;
742
+ if (events === "*" || events.includes(oldEvent.event)) {
743
+ if (!newEvent) newEvent = clone(oldEvent);
744
+
745
+ switch (strategy) {
746
+ case "create":
747
+ newEvent[mirrorProp] = u.choose(values);
748
+ break;
749
+ case "delete":
750
+ delete newEvent[mirrorProp];
751
+ break;
752
+ case "fill":
753
+ if (delta >= daysUnfilled) oldEvent[mirrorProp] = u.choose(values);
754
+ newEvent[mirrorProp] = u.choose(values);
755
+ break;
756
+ case "update":
757
+ if (!oldEvent[mirrorProp]) {
758
+ newEvent[mirrorProp] = u.choose(values);
759
+ }
760
+ else {
761
+ newEvent[mirrorProp] = oldEvent[mirrorProp];
762
+ }
763
+ break;
764
+ default:
765
+ throw new Error(`strategy ${strategy} is unknown`);
766
+ }
767
+
768
+
769
+ }
770
+ }
771
+
772
+ const mirrorDataPoint = newEvent ? newEvent : oldEvent;
773
+ await mirrorEventData.hookPush(mirrorDataPoint);
774
+
775
+ }
776
+ }
777
+
778
+
779
+ /*
780
+ --------------
781
+ ORCHESTRATORS
782
+ --------------
783
+ */
784
+
785
+
786
+ /**
787
+ * a loop that creates users and their events; the loop is inside this function
788
+ * @param {Config} config
789
+ * @param {Storage} storage
790
+ * @param {number} [concurrency]
791
+ * @return {Promise<void>}
792
+ */
793
+ async function userLoop(config, storage, concurrency = 1) {
794
+ const chance = u.getChance();
795
+ const USER_CONN = pLimit(concurrency);
796
+ const {
797
+ verbose,
798
+ numUsers,
799
+ numEvents,
800
+ isAnonymous,
801
+ hasAvatar,
802
+ hasAnonIds,
803
+ hasSessionIds,
804
+ hasLocation,
805
+ funnels,
806
+ userProps,
807
+ scdProps,
808
+ numDays,
809
+ } = config;
810
+ const { eventData, userProfilesData, scdTableData } = storage;
811
+ const avgEvPerUser = numEvents / numUsers;
812
+
813
+ for (let i = 0; i < numUsers; i++) {
814
+ await USER_CONN(async () => {
815
+ userCount++;
816
+ if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
817
+ const userId = chance.guid();
818
+ const user = u.generateUser(userId, { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds });
819
+ const { distinct_id, created } = user;
820
+ const userIsBornInDataset = chance.bool({ likelihood: 5 });
821
+ let numEventsPreformed = 0;
822
+ if (!userIsBornInDataset) delete user.created;
823
+ const adjustedCreated = userIsBornInDataset ? dayjs(created).subtract(daysShift, 'd') : dayjs.unix(global.FIXED_BEGIN);
824
+
825
+ if (hasLocation) {
826
+ const location = u.choose(DEFAULTS.locationsUsers);
827
+ for (const key in location) {
828
+ user[key] = location[key];
829
+ }
830
+ }
831
+
832
+ // Profile creation
833
+ const profile = await makeProfile(userProps, user);
834
+ await userProfilesData.hookPush(profile);
835
+
836
+ // SCD creation
837
+ const scdTableKeys = Object.keys(scdProps);
838
+ const userSCD = {};
839
+ for (const [index, key] of scdTableKeys.entries()) {
840
+ const mutations = chance.integer({ min: 1, max: 10 }); //todo: configurable mutations?
841
+ const changes = await makeSCD(scdProps[key], key, distinct_id, mutations, created);
842
+ userSCD[key] = changes;
843
+ await scdTableData[index].hookPush(changes);
844
+ }
845
+
846
+ let numEventsThisUserWillPreform = Math.floor(chance.normal({
847
+ mean: avgEvPerUser,
848
+ dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
849
+ }) * 0.714159265359);
850
+
851
+ // Power users and Shitty users logic...
852
+ chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
853
+ chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
854
+ numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
855
+
856
+ let userFirstEventTime;
857
+
858
+ const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
859
+ const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
860
+
861
+ const secondsInDay = 86400;
862
+ const noise = () => chance.integer({ min: 0, max: secondsInDay });
863
+
864
+ if (firstFunnels.length && userIsBornInDataset) {
865
+ const firstFunnel = chance.pickone(firstFunnels, user);
866
+
867
+ const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
868
+ const [data, userConverted] = await makeFunnel(firstFunnel, user, firstTime, profile, userSCD, config);
869
+ userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
870
+ numEventsPreformed += data.length;
871
+ await eventData.hookPush(data);
872
+ if (!userConverted) {
873
+ if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
874
+ return;
875
+ }
876
+ } else {
877
+ // userFirstEventTime = dayjs(created).unix();
878
+ // userFirstEventTime = global.FIXED_BEGIN;
879
+ userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
880
+ }
881
+
882
+ while (numEventsPreformed < numEventsThisUserWillPreform) {
883
+ if (usageFunnels.length) {
884
+ const currentFunnel = chance.pickone(usageFunnels);
885
+ const [data, userConverted] = await makeFunnel(currentFunnel, user, userFirstEventTime, profile, userSCD, config);
886
+ numEventsPreformed += data.length;
887
+ await eventData.hookPush(data);
888
+ } else {
889
+ const data = await makeEvent(distinct_id, userFirstEventTime, u.choose(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
890
+ numEventsPreformed++;
891
+ await eventData.hookPush(data);
892
+ }
893
+ }
894
+
895
+ if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
896
+ });
897
+ }
898
+
899
+ }
900
+
901
+
902
+ /**
903
+ * sends the data to mixpanel
904
+ * todo: this needs attention
905
+ * @param {Config} config
906
+ * @param {Storage} storage
907
+ */
908
+ async function sendToMixpanel(config, storage) {
909
+ const { adSpendData, eventData, groupProfilesData, lookupTableData, mirrorEventData, scdTableData, userProfilesData } = storage;
910
+ const { token, region, writeToDisk } = config;
911
+ const importResults = { events: {}, users: {}, groups: [] };
912
+
913
+ /** @type {import('mixpanel-import').Creds} */
914
+ const creds = { token };
915
+ const { format } = config;
916
+ const mpImportFormat = format === "json" ? "jsonl" : "csv";
917
+ /** @type {import('mixpanel-import').Options} */
918
+ const commonOpts = {
919
+ region,
920
+ fixData: true,
921
+ verbose: false,
922
+ forceStream: true,
923
+ strict: false,
924
+ dryRun: false,
925
+ abridged: false,
926
+ fixJson: true,
927
+ showProgress: true,
928
+ streamFormat: mpImportFormat
929
+ };
930
+
931
+
932
+
933
+ if (eventData || isBATCH_MODE) {
934
+ log(`importing events to mixpanel...\n`);
935
+ let eventDataToImport = clone(eventData);
936
+ if (isBATCH_MODE) {
937
+ const writeDir = eventData.getWriteDir();
938
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
939
+ eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
940
+ }
941
+ const imported = await mp(creds, eventDataToImport, {
942
+ recordType: "event",
943
+ ...commonOpts,
944
+ });
945
+ log(`\tsent ${comma(imported.success)} events\n`);
946
+ importResults.events = imported;
947
+ }
948
+ if (userProfilesData || isBATCH_MODE) {
949
+ log(`importing user profiles to mixpanel...\n`);
950
+ let userProfilesToImport = clone(userProfilesData);
951
+ if (isBATCH_MODE) {
952
+ const writeDir = userProfilesData.getWriteDir();
953
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
954
+ userProfilesToImport = files.filter(f => f.includes('-USERS-'));
955
+ }
956
+ const imported = await mp(creds, userProfilesToImport, {
957
+ recordType: "user",
958
+ ...commonOpts,
959
+ });
960
+ log(`\tsent ${comma(imported.success)} user profiles\n`);
961
+ importResults.users = imported;
962
+ }
963
+ if (adSpendData || isBATCH_MODE) {
964
+ log(`importing ad spend data to mixpanel...\n`);
965
+ let adSpendDataToImport = clone(adSpendData);
966
+ if (isBATCH_MODE) {
967
+ const writeDir = adSpendData.getWriteDir();
968
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
969
+ adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
970
+ }
971
+ const imported = await mp(creds, adSpendDataToImport, {
972
+ recordType: "event",
973
+ ...commonOpts,
974
+ });
975
+ log(`\tsent ${comma(imported.success)} ad spend events\n`);
976
+ importResults.adSpend = imported;
977
+ }
978
+ if (groupProfilesData || isBATCH_MODE) {
979
+ for (const groupEntity of groupProfilesData) {
980
+ const groupKey = groupEntity?.groupKey;
981
+ log(`importing ${groupKey} profiles to mixpanel...\n`);
982
+ let groupProfilesToImport = clone(groupEntity);
983
+ if (isBATCH_MODE) {
984
+ const writeDir = groupEntity.getWriteDir();
985
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
986
+ groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
987
+ }
988
+ const imported = await mp({ token, groupKey }, groupProfilesToImport, {
989
+ recordType: "group",
990
+ ...commonOpts,
991
+
992
+ });
993
+ log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
994
+
995
+ importResults.groups.push(imported);
996
+ }
997
+ }
998
+
999
+ //if we are in batch mode, we need to delete the files
1000
+ if (!writeToDisk && isBATCH_MODE) {
1001
+ const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
1002
+ const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
1003
+ const files = listDir.filter(f => f.includes('-EVENTS-') || f.includes('-USERS-') || f.includes('-AD-SPEND-') || f.includes('-GROUPS-'));
1004
+ for (const file of files) {
1005
+ await rm(file);
1006
+ }
1007
+ }
1008
+ return importResults;
1009
+ }
1010
+
1011
+ /*
1012
+ ----
1013
+ META
1014
+ ----
1015
+ */
1016
+
1017
+
1018
+ /**
1019
+ * ensures that the config is valid and has all the necessary fields
1020
+ * also adds some defaults
1021
+ * @param {Config} config
1022
+ */
1023
+ function validateDungeonConfig(config) {
1024
+ const chance = u.getChance();
1025
+ let {
1026
+ seed,
1027
+ numEvents = 100_000,
1028
+ numUsers = 1000,
1029
+ numDays = 30,
1030
+ epochStart = 0,
1031
+ epochEnd = dayjs().unix(),
1032
+ events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
1033
+ superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
1034
+ funnels = [],
1035
+ userProps = {
1036
+ spiritAnimal: chance.animal.bind(chance),
1037
+ },
1038
+ scdProps = {},
1039
+ mirrorProps = {},
1040
+ groupKeys = [],
1041
+ groupProps = {},
1042
+ lookupTables = [],
1043
+ hasAnonIds = false,
1044
+ hasSessionIds = false,
1045
+ format = "csv",
1046
+ token = null,
1047
+ region = "US",
1048
+ writeToDisk = false,
1049
+ verbose = false,
1050
+ makeChart = false,
1051
+ soup = {},
1052
+ hook = (record) => record,
1053
+ hasAdSpend = false,
1054
+ hasCampaigns = false,
1055
+ hasLocation = false,
1056
+ hasAvatar = false,
1057
+ isAnonymous = false,
1058
+ hasBrowser = false,
1059
+ hasAndroidDevices = false,
1060
+ hasDesktopDevices = false,
1061
+ hasIOSDevices = false,
1062
+ alsoInferFunnels = false,
1063
+ name = "",
1064
+ batchSize = 500_000,
1065
+ concurrency = 500
1066
+ } = config;
1067
+
1068
+ //ensuring default for deep objects
1069
+ if (!config.superProps) config.superProps = superProps;
1070
+ if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
1071
+
1072
+ //setting up "TIME"
1073
+ if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
1074
+ if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
1075
+ if (epochStart && numDays) { } //noop
1076
+ if (!epochStart && !numDays) debugger; //never happens
1077
+
1078
+ config.simulationName = name || makeName();
1079
+ config.name = config.simulationName;
1080
+
1081
+ //max batch size
1082
+ if (batchSize > 0) BATCH_SIZE = batchSize;
1083
+
1084
+ // funnels
1085
+
1086
+ // FUNNEL INFERENCE
1087
+ if (!funnels || !funnels.length) {
1088
+ funnels = inferFunnels(events);
1089
+ }
1090
+
1091
+ if (alsoInferFunnels) {
1092
+ const inferredFunnels = inferFunnels(events);
1093
+ funnels = [...funnels, ...inferredFunnels];
1094
+ }
1095
+
1096
+ config.concurrency = concurrency;
1097
+ config.funnels = funnels;
1098
+ config.batchSize = batchSize;
1099
+ config.seed = seed;
1100
+ config.numEvents = numEvents;
1101
+ config.numUsers = numUsers;
1102
+ config.numDays = numDays;
1103
+ config.epochStart = epochStart;
1104
+ config.epochEnd = epochEnd;
1105
+ config.events = events;
1106
+ config.superProps = superProps;
1107
+ config.funnels = funnels;
1108
+ config.userProps = userProps;
1109
+ config.scdProps = scdProps;
1110
+ config.mirrorProps = mirrorProps;
1111
+ config.groupKeys = groupKeys;
1112
+ config.groupProps = groupProps;
1113
+ config.lookupTables = lookupTables;
1114
+ config.hasAnonIds = hasAnonIds;
1115
+ config.hasSessionIds = hasSessionIds;
1116
+ config.format = format;
1117
+ config.token = token;
1118
+ config.region = region;
1119
+ config.writeToDisk = writeToDisk;
1120
+ config.verbose = verbose;
1121
+ config.makeChart = makeChart;
1122
+ config.soup = soup;
1123
+ config.hook = hook;
1124
+ config.hasAdSpend = hasAdSpend;
1125
+ config.hasCampaigns = hasCampaigns;
1126
+ config.hasLocation = hasLocation;
1127
+ config.hasAvatar = hasAvatar;
1128
+ config.isAnonymous = isAnonymous;
1129
+ config.hasBrowser = hasBrowser;
1130
+ config.hasAndroidDevices = hasAndroidDevices;
1131
+ config.hasDesktopDevices = hasDesktopDevices;
1132
+ config.hasIOSDevices = hasIOSDevices;
1133
+
1134
+ //event validation
1135
+ const validatedEvents = u.validateEventConfig(events);
1136
+ events = validatedEvents;
1137
+ config.events = validatedEvents;
1138
+
1139
+ return config;
1140
+ }
1141
+
1142
+ /**
1143
+ * our meta programming function which lets you mutate items as they are pushed into an array
1144
+ * it also does batching and writing to disk
1145
+ * it kind of is a class - as it produces new objects - but it's not a class
1146
+ * @param {Object} arr
1147
+ * @param {hookArrayOptions} opts
1148
+ * @returns {Promise<hookArray>}
1149
+ */
1150
+ async function makeHookArray(arr = [], opts = {}) {
1151
+ const { hook = a => a, type = "", filepath = "./defaultFile", format = "csv", concurrency = 1, ...rest } = opts;
1152
+ const FILE_CONN = pLimit(concurrency); // concurrent file writes
1153
+ let batch = 0;
1154
+ let writeDir;
1155
+ const dataFolder = path.resolve("./data");
1156
+ if (existsSync(dataFolder)) writeDir = dataFolder;
1157
+ else writeDir = path.resolve("./");
1158
+
1159
+ function getWritePath() {
1160
+ if (isBATCH_MODE) {
1161
+ return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
1162
+ }
1163
+ else {
1164
+ return path.join(writeDir, `${filepath}.${format}`);
1165
+ }
1166
+ }
1167
+
1168
+ function getWriteDir() {
1169
+ return path.join(writeDir, `${filepath}.${format}`);
1170
+ }
1171
+
1172
+ async function transformThenPush(item) {
1173
+ if (item === null || item === undefined) return false;
1174
+ if (typeof item === 'object' && Object.keys(item).length === 0) return false;
1175
+
1176
+ if (Array.isArray(item)) {
1177
+ for (const i of item) {
1178
+ try {
1179
+ const enriched = await hook(i, type, rest);
1180
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1181
+ else arr.push(enriched);
1182
+ } catch (e) {
1183
+ console.error(`\n\nyour hook had an error\n\n`, e);
1184
+ arr.push(i);
1185
+ }
1186
+ }
1187
+ } else {
1188
+ try {
1189
+ const enriched = await hook(item, type, rest);
1190
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1191
+ else arr.push(enriched);
1192
+ } catch (e) {
1193
+ console.error(`\n\nyour hook had an error\n\n`, e);
1194
+ arr.push(item);
1195
+ }
1196
+ }
1197
+
1198
+ if (arr.length > BATCH_SIZE) {
1199
+ isBATCH_MODE = true;
1200
+ batch++;
1201
+ const writePath = getWritePath();
1202
+ const writeResult = await FILE_CONN(() => writeToDisk(arr, { writePath }));
1203
+ return writeResult;
1204
+ } else {
1205
+ return Promise.resolve(false);
1206
+ }
1207
+ }
1208
+
1209
+ async function writeToDisk(data, options) {
1210
+ const { writePath } = options;
1211
+ let writeResult;
1212
+ if (VERBOSE) log(`\n\n\twriting ${writePath}\n\n`);
1213
+ switch (format) {
1214
+ case "csv":
1215
+ writeResult = await u.streamCSV(writePath, data);
1216
+ break;
1217
+ case "json":
1218
+ writeResult = await u.streamJSON(writePath, data);
1219
+ break;
1220
+ default:
1221
+ throw new Error(`format ${format} is not supported`);
1222
+ }
1223
+ if (isBATCH_MODE) data.length = 0;
1224
+ return writeResult;
1225
+ }
1226
+
1227
+ async function flush() {
1228
+ if (arr.length > 0) {
1229
+ batch++;
1230
+ const writePath = getWritePath();
1231
+ await FILE_CONN(() => writeToDisk(arr, { writePath }));
1232
+ if (isBATCH_MODE) arr.length = 0; // free up memory for batch mode
1233
+ }
1234
+ }
1235
+
1236
+ const enrichedArray = arr;
1237
+
1238
+ enrichedArray.hookPush = transformThenPush;
1239
+ enrichedArray.flush = flush;
1240
+ enrichedArray.getWriteDir = getWriteDir;
1241
+ enrichedArray.getWritePath = getWritePath;
1242
+
1243
+ for (const key in rest) {
1244
+ enrichedArray[key.toString()] = rest[key];
1245
+ }
1246
+
1247
+ return enrichedArray;
1248
+ }
1249
+
1250
+
1251
+ /**
1252
+ * create funnels out of random events
1253
+ * @param {EventConfig[]} events
1254
+ */
1255
+ function inferFunnels(events) {
1256
+ const createdFunnels = [];
1257
+ const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
1258
+ const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
1259
+ const numFunnelsToCreate = Math.ceil(usageEvents.length);
1260
+ /** @type {Funnel} */
1261
+ const funnelTemplate = {
1262
+ sequence: [],
1263
+ conversionRate: 50,
1264
+ order: 'sequential',
1265
+ requireRepeats: false,
1266
+ props: {},
1267
+ timeToConvert: 1,
1268
+ isFirstFunnel: false,
1269
+ weight: 1
1270
+ };
1271
+ if (firstEvents.length) {
1272
+ for (const event of firstEvents) {
1273
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
1274
+ }
1275
+ }
1276
+
1277
+ //at least one funnel with all usage events
1278
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
1279
+
1280
+ //for the rest, make random funnels
1281
+ followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
1282
+ /** @type {Funnel} */
1283
+ const funnel = { ...clone(funnelTemplate) };
1284
+ funnel.conversionRate = u.integer(25, 75);
1285
+ funnel.timeToConvert = u.integer(1, 10);
1286
+ funnel.weight = u.integer(1, 10);
1287
+ const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
1288
+ funnel.sequence = sequence;
1289
+ funnel.order = 'random';
1290
+ createdFunnels.push(funnel);
1291
+ }
1292
+
1293
+ return createdFunnels;
1294
+
1295
+ }
1296
+
1297
+
1298
+ /*
1299
+ ----
1300
+ CLI
1301
+ ----
1302
+ */
1303
+
1304
+ if (require.main === module) {
1305
+ isCLI = true;
1306
+ const args = /** @type {Config} */ (getCliParams());
1307
+ let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, hasSessionIds, hasAnonIds } = args;
1308
+ const suppliedConfig = args._[0];
1309
+
1310
+ //if the user specifies an separate config file
1311
+ let config = null;
1312
+ if (suppliedConfig) {
1313
+ console.log(`using ${suppliedConfig} for data\n`);
1314
+ config = require(path.resolve(suppliedConfig));
1315
+ }
1316
+ else {
1317
+ if (complex) {
1318
+ console.log(`... using default COMPLEX configuration [everything] ...\n`);
1319
+ console.log(`... for more simple data, don't use the --complex flag ...\n`);
1320
+ console.log(`... or specify your own js config file (see docs or --help) ...\n`);
1321
+ config = require(path.resolve(__dirname, "./schemas/complex.js"));
1322
+ }
1323
+ else {
1324
+ console.log(`... using default SIMPLE configuration [events + users] ...\n`);
1325
+ console.log(`... for more complex data, use the --complex flag ...\n`);
1326
+ config = require(path.resolve(__dirname, "./schemas/simple.js"));
1327
+ }
1328
+ }
1329
+
1330
+ //override config with cli params
1331
+ if (token) config.token = token;
1332
+ if (seed) config.seed = seed;
1333
+ if (format === "csv" && config.format === "json") format = "json";
1334
+ if (format) config.format = format;
1335
+ if (numDays) config.numDays = numDays;
1336
+ if (numUsers) config.numUsers = numUsers;
1337
+ if (numEvents) config.numEvents = numEvents;
1338
+ if (region) config.region = region;
1339
+ if (writeToDisk) config.writeToDisk = writeToDisk;
1340
+ if (writeToDisk === 'false') config.writeToDisk = false;
1341
+ if (hasSessionIds) config.hasSessionIds = hasSessionIds;
1342
+ if (hasAnonIds) config.hasAnonIds = hasAnonIds;
1343
+ config.verbose = true;
1344
+
1345
+ main(config)
1346
+ .then((data) => {
1347
+ //todo: rethink summary
1348
+ log(`-----------------SUMMARY-----------------`);
1349
+ const d = { success: 0, bytes: 0 };
1350
+ const darr = [d];
1351
+ const { events = d, groups = darr, users = d } = data?.importResults || {};
1352
+ const files = data.files;
1353
+ const folder = files?.[0]?.split(path.basename(files?.[0]))?.shift();
1354
+ const groupBytes = groups.reduce((acc, group) => {
1355
+ return acc + group.bytes;
1356
+ }, 0);
1357
+ const groupSuccess = groups.reduce((acc, group) => {
1358
+ return acc + group.success;
1359
+ }, 0);
1360
+ const bytes = events.bytes + groupBytes + users.bytes;
1361
+ const stats = {
1362
+ events: comma(events.success || 0),
1363
+ users: comma(users.success || 0),
1364
+ groups: comma(groupSuccess || 0),
1365
+ bytes: bytesHuman(bytes || 0),
1366
+ };
1367
+ if (bytes > 0) console.table(stats);
1368
+ log(`\nfiles written to ${folder || "no where; we didn't write anything"} ...`);
1369
+ log(" " + files?.flat().join("\n "));
1370
+ log(`\n----------------SUMMARY-----------------\n\n\n`);
1371
+ })
1372
+ .catch((e) => {
1373
+ log(`------------------ERROR------------------`);
1374
+ console.error(e);
1375
+ log(`------------------ERROR------------------`);
1376
+ debugger;
1377
+ })
1378
+ .finally(() => {
1379
+ log("enjoy your data! :)");
1380
+ u.openFinder(path.resolve("./data"));
1381
+ });
1382
+ } else {
1383
+ main.generators = { makeEvent, makeFunnel, makeProfile, makeSCD, makeAdSpend, makeMirror };
1384
+ main.orchestrators = { userLoop, validateDungeonConfig, sendToMixpanel };
1385
+ main.meta = { inferFunnels, hookArray: makeHookArray };
1386
+ module.exports = main;
1387
+ }
1388
+
1389
+
1390
+ /*
1391
+ ----
1392
+ HELPERS
1393
+ ----
1394
+ */
1395
+
1396
+ function log(...args) {
1397
+ const cwd = process.cwd(); // Get the current working directory
1398
+
1399
+ for (let i = 0; i < args.length; i++) {
1400
+ // Replace occurrences of the current working directory with "./" in string arguments
1401
+ if (typeof args[i] === 'string') {
1402
+ args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
1403
+ }
1404
+ }
1405
+ if (VERBOSE) console.log(...args);
1406
+ }
1407
+
1408
+ function track(name, props, ...rest) {
1409
+ if (process.env.NODE_ENV === 'test') return;
1410
+ metrics(name, props, ...rest);
1411
+ }
1412
+
1413
+
1414
+ /** @typedef {import('./types.js').Config} Config */
1415
+ /** @typedef {import('./types.js').AllData} AllData */
1416
+ /** @typedef {import('./types.js').EventConfig} EventConfig */
1417
+ /** @typedef {import('./types.js').Funnel} Funnel */
1418
+ /** @typedef {import('./types.js').Person} Person */
1419
+ /** @typedef {import('./types.js').SCDSchema} SCDSchema */
1420
+ /** @typedef {import('./types.js').UserProfile} UserProfile */
1421
+ /** @typedef {import('./types.js').EventSchema} EventSchema */
1422
+ /** @typedef {import('./types.js').Storage} Storage */
1423
+ /** @typedef {import('./types.js').Result} Result */
1424
+ /** @typedef {import('./types.js').ValueValid} ValueValid */
1425
+ /** @typedef {import('./types.js').HookedArray} hookArray */
1426
+ /** @typedef {import('./types.js').hookArrayOptions} hookArrayOptions */
1427
+ /** @typedef {import('./types.js').GroupProfileSchema} GroupProfile */