make-mp-data 1.4.4 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js ADDED
@@ -0,0 +1,1410 @@
1
+ #! /usr/bin/env node
2
+
3
+ /*
4
+ make fake mixpanel data easily!
5
+ by AK
6
+ ak@mixpanel.com
7
+ */
8
+
9
+ //todo: churn implementation
10
+ //todo: regular interval events (like 'card charged')
11
+ //todo: SCDs send to mixpanel
12
+ //todo: decent 'new dungeon' workflow
13
+
14
+
15
+ //TIME
16
+ const dayjs = require("dayjs");
17
+ const utc = require("dayjs/plugin/utc");
18
+ dayjs.extend(utc);
19
+ const NOW = dayjs('2024-02-02').unix();
20
+ global.NOW = NOW;
21
+ // ^ this creates a FIXED POINT in time; we will shift it later
22
+ const actualNow = dayjs();
23
+ const fixedNow = dayjs.unix(global.NOW);
24
+ const timeShift = actualNow.diff(fixedNow, "second");
25
+
26
+ // UTILS
27
+ const { existsSync } = require("fs");
28
+ const pLimit = require('p-limit');
29
+ const os = require("os");
30
+ const path = require("path");
31
+ const { comma, bytesHuman, makeName, md5, clone, tracker, uid, timer, ls, rm } = require("ak-tools");
32
+ const jobTimer = timer('job');
33
+ const { generateLineChart } = require('./src/chart.js');
34
+ const { version } = require('./package.json');
35
+ const mp = require("mixpanel-import");
36
+ const u = require("./src/utils.js");
37
+ const getCliParams = require("./src/cli.js");
38
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
39
+
40
+ // DEFAULTS
41
+ const { campaigns, devices, locations } = require('./src/defaults.js');
42
+ let CAMPAIGNS;
43
+ let DEFAULTS;
44
+ /** @type {Storage} */
45
+ let STORAGE;
46
+ /** @type {Config} */
47
+ let CONFIG;
48
+ require('dotenv').config();
49
+
50
+
51
+ // RUN STATE
52
+ let VERBOSE = false;
53
+ let isCLI = false;
54
+ // if we are running in batch mode, we MUST write to disk before we can send to mixpanel
55
+ let isBATCH_MODE = false;
56
+ let BATCH_SIZE = 500_000;
57
+
58
+ //todo: these should be moved into the hookedArrays
59
+ let operations = 0;
60
+ let eventCount = 0;
61
+ let userCount = 0;
62
+
63
+
64
+
65
+ /**
66
+ * generates fake mixpanel data
67
+ * @param {Config} config
68
+ */
69
+ async function main(config) {
70
+ jobTimer.start();
71
+ const seedWord = process.env.SEED || config.seed || "hello friend!";
72
+ config.seed = seedWord;
73
+ const chance = u.initChance(seedWord);
74
+ //seed the random number generator, get it with getChance()
75
+ // ^ this is critical; same seed = same data;
76
+ // ^ seed can be passed in as an env var or in the config
77
+ validateDungeonConfig(config);
78
+
79
+ //GLOBALS
80
+ CONFIG = config;
81
+ VERBOSE = config.verbose;
82
+ CAMPAIGNS = campaigns;
83
+ DEFAULTS = {
84
+ locationsUsers: u.pickAWinner(clone(locations).map(l => { delete l.country; return l; }), 0),
85
+ locationsEvents: u.pickAWinner(clone(locations).map(l => { delete l.country_code; return l; }), 0),
86
+ iOSDevices: u.pickAWinner(devices.iosDevices, 0),
87
+ androidDevices: u.pickAWinner(devices.androidDevices, 0),
88
+ desktopDevices: u.pickAWinner(devices.desktopDevices, 0),
89
+ browsers: u.pickAWinner(devices.browsers, 0),
90
+ campaigns: u.pickAWinner(campaigns, 0),
91
+ };
92
+
93
+
94
+ //TRACKING
95
+ const runId = uid(42);
96
+ const { events, superProps, userProps, scdProps, groupKeys, groupProps, lookupTables, soup, hook, mirrorProps, ...trackingParams } = config;
97
+ let { funnels } = config;
98
+ trackingParams.runId = runId;
99
+ trackingParams.version = version;
100
+
101
+ //STORAGE
102
+ const { simulationName, format } = config;
103
+ const eventData = await makeHookArray([], { hook, type: "event", config, format, filepath: `${simulationName}-EVENTS` });
104
+ const userProfilesData = await makeHookArray([], { hook, type: "user", config, format, filepath: `${simulationName}-USERS` });
105
+ const adSpendData = await makeHookArray([], { hook, type: "ad-spend", config, format, filepath: `${simulationName}-AD-SPEND` });
106
+
107
+ // SCDs, Groups, + Lookups may have multiple tables
108
+ const scdTableKeys = Object.keys(scdProps);
109
+ const scdTableData = await Promise.all(scdTableKeys.map(async (key) =>
110
+ await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, filepath: `${simulationName}-SCD-${key}` })
111
+ ));
112
+ const groupTableKeys = Object.keys(groupKeys);
113
+ const groupProfilesData = await Promise.all(groupTableKeys.map(async (key, index) => {
114
+ const groupKey = groupKeys[index]?.slice()?.shift();
115
+ return await makeHookArray([], { hook, type: "group", config, format, groupKey, filepath: `${simulationName}-GROUPS-${groupKey}` });
116
+ }));
117
+
118
+ const lookupTableKeys = Object.keys(lookupTables);
119
+ const lookupTableData = await Promise.all(lookupTableKeys.map(async (key, index) => {
120
+ const lookupKey = lookupTables[index].key;
121
+ return await makeHookArray([], { hook, type: "lookup", config, format, lookupKey: lookupKey, filepath: `${simulationName}-LOOKUP-${lookupKey}` });
122
+ }));
123
+
124
+ const mirrorEventData = await makeHookArray([], { hook, type: "mirror", config, format, filepath: `${simulationName}-MIRROR` });
125
+
126
+ STORAGE = { eventData, userProfilesData, scdTableData, groupProfilesData, lookupTableData, mirrorEventData, adSpendData };
127
+
128
+
129
+ track('start simulation', trackingParams);
130
+ log(`------------------SETUP------------------`);
131
+ log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
132
+ log(`and your configuration is:\n\n`, JSON.stringify(trackingParams, null, 2));
133
+ log(`------------------SETUP------------------`, "\n");
134
+
135
+
136
+
137
+ //USERS
138
+ log(`---------------SIMULATION----------------`, "\n\n");
139
+ const { concurrency = 1 } = config;
140
+ await userLoop(config, STORAGE, concurrency);
141
+ const { hasAdSpend, epochStart, epochEnd } = config;
142
+
143
+ // AD SPEND
144
+ if (hasAdSpend) {
145
+ const days = u.datesBetween(epochStart, epochEnd);
146
+ for (const day of days) {
147
+ const dailySpendData = await makeAdSpend(day);
148
+ for (const spendEvent of dailySpendData) {
149
+ await adSpendData.hookPush(spendEvent);
150
+ }
151
+ }
152
+
153
+ }
154
+
155
+
156
+ log("\n");
157
+
158
+ //GROUP PROFILES
159
+ for (const [index, groupPair] of groupKeys.entries()) {
160
+ const groupKey = groupPair[0];
161
+ const groupCardinality = groupPair[1];
162
+ for (let i = 1; i < groupCardinality + 1; i++) {
163
+ if (VERBOSE) u.progress([["groups", i]]);
164
+ const props = await makeProfile(groupProps[groupKey]);
165
+ const group = {
166
+ [groupKey]: i,
167
+ ...props,
168
+ };
169
+ group["distinct_id"] = i.toString();
170
+ await groupProfilesData[index].hookPush(group);
171
+ }
172
+ }
173
+ log("\n");
174
+
175
+ //LOOKUP TABLES
176
+ for (const [index, lookupTable] of lookupTables.entries()) {
177
+ const { key, entries, attributes } = lookupTable;
178
+ for (let i = 1; i < entries + 1; i++) {
179
+ if (VERBOSE) u.progress([["lookups", i]]);
180
+ const props = await makeProfile(attributes);
181
+ const item = {
182
+ [key]: i,
183
+ ...props,
184
+ };
185
+ await lookupTableData[index].hookPush(item);
186
+ }
187
+
188
+ }
189
+ log("\n");
190
+
191
+
192
+ // MIRROR
193
+ if (Object.keys(mirrorProps).length) await makeMirror(config, STORAGE);
194
+
195
+
196
+ log("\n");
197
+ log(`---------------SIMULATION----------------`, "\n");
198
+
199
+ // draw charts
200
+ const { makeChart } = config;
201
+ if (makeChart) {
202
+ const bornEvents = config.events?.filter((e) => e?.isFirstEvent)?.map(e => e.event) || [];
203
+ const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
204
+ const bornBehaviors = [...bornEvents, ...bornFunnels];
205
+ const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
206
+ }
207
+ const { writeToDisk, token } = config;
208
+ if (!writeToDisk && !token) {
209
+ jobTimer.stop(false);
210
+ const { start, end, delta, human } = jobTimer.report(false);
211
+ // this is awkward, but i couldn't figure out any other way to assert a type in jsdoc
212
+ const i = /** @type {any} */ (STORAGE);
213
+ i.time = { start, end, delta, human };
214
+ const j = /** @type {Result} */ (i);
215
+ return j;
216
+
217
+ }
218
+
219
+ log(`-----------------WRITES------------------`, `\n\n`);
220
+
221
+ // write to disk and/or send to mixpanel
222
+ let files;
223
+ if (writeToDisk) {
224
+ for (const key in STORAGE) {
225
+ const table = STORAGE[key];
226
+ if (table.length && typeof table.flush === "function") {
227
+ await table.flush();
228
+ } else {
229
+ if (Array.isArray(table) && typeof table[0]?.flush === "function") {
230
+ for (const subTable of table) {
231
+ await subTable.flush();
232
+ }
233
+ }
234
+ }
235
+ }
236
+ }
237
+ let importResults;
238
+ if (token) importResults = await sendToMixpanel(config, STORAGE);
239
+
240
+
241
+ log(`\n-----------------WRITES------------------`, "\n");
242
+ track('end simulation', trackingParams);
243
+ jobTimer.stop(false);
244
+ const { start, end, delta, human } = jobTimer.report(false);
245
+
246
+ return {
247
+ ...STORAGE,
248
+ importResults,
249
+ files,
250
+ time: { start, end, delta, human },
251
+ };
252
+ }
253
+
254
+
255
+
256
+
257
+ /*
258
+ ------
259
+ MODELS
260
+ ------
261
+ */
262
+
263
+ /**
264
+ * creates a mixpanel event with a flat shape
265
+ * @param {string} distinct_id
266
+ * @param {number} earliestTime
267
+ * @param {EventConfig} chosenEvent
268
+ * @param {string[]} [anonymousIds]
269
+ * @param {string[]} [sessionIds]
270
+ * @param {Object} [superProps]
271
+ * @param {Object} [groupKeys]
272
+ * @param {Boolean} [isFirstEvent]
273
+ * @return {Promise<EventSchema>}
274
+ */
275
+ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, sessionIds, superProps, groupKeys, isFirstEvent) {
276
+ operations++;
277
+ eventCount++;
278
+ if (!distinct_id) throw new Error("no distinct_id");
279
+ if (!anonymousIds) anonymousIds = [];
280
+ if (!sessionIds) sessionIds = [];
281
+ if (!earliestTime) throw new Error("no earliestTime");
282
+ if (!chosenEvent) throw new Error("no chosenEvent");
283
+ if (!superProps) superProps = {};
284
+ if (!groupKeys) groupKeys = [];
285
+ if (!isFirstEvent) isFirstEvent = false;
286
+ const chance = u.getChance();
287
+ const { mean = 0, deviation = 2, peaks = 5 } = CONFIG?.soup || {};
288
+ const {
289
+ hasAndroidDevices = false,
290
+ hasBrowser = false,
291
+ hasCampaigns = false,
292
+ hasDesktopDevices = false,
293
+ hasIOSDevices = false,
294
+ hasLocation = false
295
+ } = CONFIG || {};
296
+
297
+ //event model
298
+ const eventTemplate = {
299
+ event: chosenEvent.event,
300
+ source: "dm4",
301
+ time: "",
302
+ insert_id: "",
303
+ };
304
+
305
+ let defaultProps = {};
306
+ let devicePool = [];
307
+ if (hasLocation) defaultProps.location = DEFAULTS.locationsEvents();
308
+ if (hasBrowser) defaultProps.browser = DEFAULTS.browsers();
309
+ if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
310
+ if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
311
+ if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
312
+ // we don't always have campaigns, because of attribution
313
+ if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = DEFAULTS.campaigns();
314
+ const devices = devicePool.flat();
315
+ if (devices.length) defaultProps.device = devices;
316
+
317
+
318
+ //event time
319
+ if (earliestTime > NOW) {
320
+ earliestTime = dayjs.unix(NOW).subtract(2, 'd').unix();
321
+ };
322
+
323
+ if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
324
+ if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, NOW, peaks, deviation, mean);
325
+
326
+ // anonymous and session ids
327
+ if (anonymousIds.length) eventTemplate.device_id = chance.pickone(anonymousIds);
328
+ if (sessionIds.length) eventTemplate.session_id = chance.pickone(sessionIds);
329
+
330
+ //sometimes have a user_id
331
+ if (!isFirstEvent && chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
332
+
333
+ // ensure that there is a user_id or device_id
334
+ if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
335
+
336
+ const props = { ...chosenEvent.properties, ...superProps };
337
+
338
+ //iterate through custom properties
339
+ for (const key in props) {
340
+ try {
341
+ eventTemplate[key] = u.choose(props[key]);
342
+ } catch (e) {
343
+ console.error(`error with ${key} in ${chosenEvent.event} event`, e);
344
+ debugger;
345
+ }
346
+ }
347
+
348
+ //iterate through default properties
349
+ for (const key in defaultProps) {
350
+ if (Array.isArray(defaultProps[key])) {
351
+ const choice = u.choose(defaultProps[key]);
352
+ if (typeof choice === "string") {
353
+ if (!eventTemplate[key]) eventTemplate[key] = choice;
354
+ }
355
+
356
+ else if (Array.isArray(choice)) {
357
+ for (const subChoice of choice) {
358
+ if (!eventTemplate[key]) eventTemplate[key] = subChoice;
359
+ }
360
+ }
361
+
362
+ else if (typeof choice === "object") {
363
+ for (const subKey in choice) {
364
+ if (typeof choice[subKey] === "string") {
365
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = choice[subKey];
366
+ }
367
+ else if (Array.isArray(choice[subKey])) {
368
+ const subChoice = u.choose(choice[subKey]);
369
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
370
+ }
371
+
372
+ else if (typeof choice[subKey] === "object") {
373
+ for (const subSubKey in choice[subKey]) {
374
+ if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = choice[subKey][subSubKey];
375
+ }
376
+ }
377
+
378
+ }
379
+ }
380
+
381
+
382
+ }
383
+ }
384
+
385
+ //iterate through groups
386
+ for (const groupPair of groupKeys) {
387
+ const groupKey = groupPair[0];
388
+ const groupCardinality = groupPair[1];
389
+ const groupEvents = groupPair[2] || [];
390
+
391
+ // empty array for group events means all events
392
+ if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
393
+ if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
394
+ }
395
+
396
+ //make $insert_id
397
+ eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
398
+
399
+ //time shift to present
400
+ const newTime = dayjs(eventTemplate.time).add(timeShift, "second");
401
+ eventTemplate.time = newTime.toISOString();
402
+
403
+
404
+
405
+ return eventTemplate;
406
+ }
407
+
408
+ /**
409
+ * takes a description of a funnel an generates successful and unsuccessful conversions
410
+ * this is called MANY times per user
411
+ * @param {Funnel} funnel
412
+ * @param {Person} user
413
+ * @param {number} firstEventTime
414
+ * @param {UserProfile | Object} [profile]
415
+ * @param {Record<string, SCDSchema[]> | Object} [scd]
416
+ * @param {Config} [config]
417
+ * @return {Promise<[EventSchema[], Boolean]>}
418
+ */
419
+ async function makeFunnel(funnel, user, firstEventTime, profile, scd, config) {
420
+ if (!funnel) throw new Error("no funnel");
421
+ if (!user) throw new Error("no user");
422
+ if (!profile) profile = {};
423
+ if (!scd) scd = {};
424
+
425
+ const chance = u.getChance();
426
+ const { hook = async (a) => a } = config;
427
+ await hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
428
+ let {
429
+ sequence,
430
+ conversionRate = 50,
431
+ order = 'sequential',
432
+ timeToConvert = 1,
433
+ props,
434
+ requireRepeats = false,
435
+ } = funnel;
436
+ const { distinct_id, created, anonymousIds, sessionIds } = user;
437
+ const { superProps, groupKeys } = config;
438
+
439
+
440
+ //choose the properties for this funnel
441
+ const chosenFunnelProps = { ...props, ...superProps };
442
+ for (const key in props) {
443
+ try {
444
+ chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
445
+ } catch (e) {
446
+ console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
447
+ debugger;
448
+ }
449
+ }
450
+
451
+ const funnelPossibleEvents = sequence
452
+ .map((eventName) => {
453
+ const foundEvent = config?.events?.find((e) => e.event === eventName);
454
+ /** @type {EventConfig} */
455
+ const eventSpec = clone(foundEvent) || { event: eventName, properties: {} };
456
+ for (const key in eventSpec.properties) {
457
+ try {
458
+ eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
459
+ } catch (e) {
460
+ console.error(`error with ${key} in ${eventSpec.event} event`, e);
461
+ debugger;
462
+ }
463
+ }
464
+ delete eventSpec.isFirstEvent;
465
+ delete eventSpec.weight;
466
+ eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
467
+ return eventSpec;
468
+ })
469
+ .reduce((acc, step) => {
470
+ if (!requireRepeats) {
471
+ if (acc.find(e => e.event === step.event)) {
472
+ if (chance.bool({ likelihood: 50 })) {
473
+ conversionRate = Math.floor(conversionRate * 1.35); //increase conversion rate
474
+ acc.push(step);
475
+ }
476
+ //A SKIPPED STEP!
477
+ else {
478
+ conversionRate = Math.floor(conversionRate * .70); //reduce conversion rate
479
+ return acc; //early return to skip the step
480
+ }
481
+ }
482
+ else {
483
+ acc.push(step);
484
+ }
485
+ }
486
+ else {
487
+ acc.push(step);
488
+ }
489
+ return acc;
490
+ }, []);
491
+
492
+ let doesUserConvert = chance.bool({ likelihood: conversionRate });
493
+ let numStepsUserWillTake = sequence.length;
494
+ if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
495
+ const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
496
+ const msInHour = 60000 * 60;
497
+ const funnelStepsUserWillTake = funnelPossibleEvents.slice(0, numStepsUserWillTake);
498
+
499
+ let funnelActualOrder = [];
500
+
501
+ switch (order) {
502
+ case "sequential":
503
+ funnelActualOrder = funnelStepsUserWillTake;
504
+ break;
505
+ case "random":
506
+ funnelActualOrder = u.shuffleArray(funnelStepsUserWillTake);
507
+ break;
508
+ case "first-fixed":
509
+ funnelActualOrder = u.shuffleExceptFirst(funnelStepsUserWillTake);
510
+ break;
511
+ case "last-fixed":
512
+ funnelActualOrder = u.shuffleExceptLast(funnelStepsUserWillTake);
513
+ break;
514
+ case "first-and-last-fixed":
515
+ funnelActualOrder = u.fixFirstAndLast(funnelStepsUserWillTake);
516
+ break;
517
+ case "middle-fixed":
518
+ funnelActualOrder = u.shuffleOutside(funnelStepsUserWillTake);
519
+ break;
520
+ case "interrupted":
521
+ const potentialSubstitutes = config?.events
522
+ ?.filter(e => !e.isFirstEvent)
523
+ ?.filter(e => !sequence.includes(e.event)) || [];
524
+ funnelActualOrder = u.interruptArray(funnelStepsUserWillTake, potentialSubstitutes);
525
+ break;
526
+ default:
527
+ funnelActualOrder = funnelStepsUserWillTake;
528
+ break;
529
+ }
530
+
531
+
532
+
533
+ let lastTimeJump = 0;
534
+ const funnelActualEventsWithOffset = funnelActualOrder
535
+ .map((event, index) => {
536
+ if (index === 0) {
537
+ event.relativeTimeMs = 0;
538
+ return event;
539
+ }
540
+
541
+ // Calculate base increment for each step
542
+ const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
543
+
544
+ // Introduce a random fluctuation factor
545
+ const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
546
+
547
+ // Ensure the time increments are increasing and add randomness
548
+ const previousTime = lastTimeJump;
549
+ const currentTime = previousTime + baseIncrement + fluctuation;
550
+
551
+ // Assign the calculated time to the event
552
+ const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
553
+ lastTimeJump = chosenTime;
554
+ event.relativeTimeMs = chosenTime;
555
+ return event;
556
+ });
557
+
558
+
559
+ const earliestTime = firstEventTime || dayjs(created).unix();
560
+ let funnelStartTime;
561
+ let finalEvents = await Promise.all(funnelActualEventsWithOffset
562
+ .map(async (event, index) => {
563
+ const newEvent = await makeEvent(distinct_id, earliestTime, event, anonymousIds, sessionIds, {}, groupKeys);
564
+ if (index === 0) {
565
+ funnelStartTime = dayjs(newEvent.time);
566
+ delete newEvent.relativeTimeMs;
567
+ return Promise.resolve(newEvent);
568
+ }
569
+ try {
570
+ newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
571
+ delete newEvent.relativeTimeMs;
572
+ return Promise.resolve(newEvent);
573
+ }
574
+ catch (e) {
575
+ //shouldn't happen
576
+ debugger;
577
+ }
578
+ }));
579
+
580
+
581
+ await hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
582
+ return [finalEvents, doesUserConvert];
583
+ }
584
+
585
+ /**
586
+ * a function that creates a profile (user or group)
587
+ * @overload
588
+ * @param {{[key: string]: ValueValid}} props
589
+ * @param {{[key: string]: ValueValid}} [defaults]
590
+ * @returns {Promise<UserProfile>}
591
+ *
592
+ * @overload
593
+ * @param {{[key: string]: ValueValid}} props
594
+ * @param {{[key: string]: ValueValid}} [defaults]
595
+ * @returns {Promise<GroupProfile>}
596
+ */
597
+ async function makeProfile(props, defaults) {
598
+ operations++;
599
+
600
+ const profile = {
601
+ ...defaults,
602
+ };
603
+
604
+ for (const key in props) {
605
+ try {
606
+ profile[key] = u.choose(props[key]);
607
+ } catch (e) {
608
+ // never gets here
609
+ // debugger;
610
+ }
611
+ }
612
+
613
+ return profile;
614
+ }
615
+
616
+ /**
617
+ * @param {ValueValid} prop
618
+ * @param {string} scdKey
619
+ * @param {string} distinct_id
620
+ * @param {number} mutations
621
+ * @param {string} created
622
+ * @return {Promise<SCDSchema[]>}
623
+ */
624
+ async function makeSCD(prop, scdKey, distinct_id, mutations, created) {
625
+ if (JSON.stringify(prop) === "{}" || JSON.stringify(prop) === "[]") return [];
626
+ const scdEntries = [];
627
+ let lastInserted = dayjs(created);
628
+ const deltaDays = dayjs().diff(lastInserted, "day");
629
+
630
+ for (let i = 0; i < mutations; i++) {
631
+ if (lastInserted.isAfter(dayjs())) break;
632
+ let scd = await makeProfile({ [scdKey]: prop }, { distinct_id });
633
+
634
+ // Explicitly constructing SCDSchema object with all required properties
635
+ const scdEntry = {
636
+ ...scd, // spread existing properties
637
+ distinct_id: scd.distinct_id || distinct_id, // ensure distinct_id is set
638
+ insertTime: lastInserted.add(u.integer(1, 1000), "seconds").toISOString(),
639
+ startTime: lastInserted.toISOString()
640
+ };
641
+
642
+ // Ensure TypeScript sees all required properties are set
643
+ if (scdEntry.hasOwnProperty('insertTime') && scdEntry.hasOwnProperty('startTime')) {
644
+ scdEntries.push(scdEntry);
645
+ }
646
+
647
+ lastInserted = lastInserted
648
+ .add(u.integer(0, deltaDays), "day")
649
+ .subtract(u.integer(1, 1000), "seconds");
650
+ }
651
+
652
+ return scdEntries;
653
+ }
654
+
655
+
656
+ /**
657
+ * creates ad spend events for a given day for all campaigns in default campaigns
658
+ * @param {string} day
659
+ * @return {Promise<EventSchema[]>}
660
+ */
661
+ async function makeAdSpend(day, campaigns = CAMPAIGNS) {
662
+ operations++;
663
+ const chance = u.getChance();
664
+ const adSpendEvents = [];
665
+ for (const network of campaigns) {
666
+ const campaigns = network.utm_campaign;
667
+ loopCampaigns: for (const campaign of campaigns) {
668
+ if (campaign === "$organic") continue loopCampaigns;
669
+
670
+ const CAC = u.integer(42, 420); //todo: get the # of users created in this day from eventData
671
+ // Randomly generating cost
672
+ const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
673
+
674
+ // Ensuring realistic CPC and CTR
675
+ const avgCPC = chance.floating({ min: 0.33, max: 2.00, fixed: 4 });
676
+ const avgCTR = chance.floating({ min: 0.05, max: 0.25, fixed: 4 });
677
+
678
+ // Deriving impressions from cost and avg CPC
679
+ const clicks = Math.floor(cost / avgCPC);
680
+ const impressions = Math.floor(clicks / avgCTR);
681
+ const views = Math.floor(impressions * avgCTR);
682
+
683
+ //tags
684
+ const utm_medium = u.choose(u.pickAWinner(network.utm_medium)());
685
+ const utm_content = u.choose(u.pickAWinner(network.utm_content)());
686
+ const utm_term = u.choose(u.pickAWinner(network.utm_term)());
687
+ //each of these is a campaign
688
+ const id = network.utm_source[0] + '-' + campaign;
689
+ const uid = md5(id);
690
+ const adSpendEvent = {
691
+ event: "$ad_spend",
692
+ time: day,
693
+ source: 'dm4',
694
+ utm_campaign: campaign,
695
+ campaign_id: id,
696
+ insert_id: uid,
697
+ network: network.utm_source[0].toUpperCase(),
698
+ distinct_id: network.utm_source[0].toUpperCase(),
699
+ utm_source: network.utm_source[0],
700
+ utm_medium,
701
+ utm_content,
702
+ utm_term,
703
+
704
+
705
+ clicks,
706
+ views,
707
+ impressions,
708
+ cost,
709
+ date: dayjs(day).format("YYYY-MM-DD"),
710
+ };
711
+ adSpendEvents.push(adSpendEvent);
712
+ }
713
+
714
+
715
+ }
716
+ return adSpendEvents;
717
+ }
718
+
719
+ /**
720
+ * takes event data and creates mirror datasets in a future state
721
+ * depending on the mirror strategy
722
+ * @param {Config} config
723
+ * @param {Storage} storage
724
+ * @return {Promise<void>}
725
+ */
726
+ async function makeMirror(config, storage) {
727
+ const { mirrorProps } = config;
728
+ const { eventData, mirrorEventData } = storage;
729
+ const now = dayjs();
730
+
731
+ for (const oldEvent of eventData) {
732
+ let newEvent;
733
+ const eventTime = dayjs(oldEvent.time);
734
+ const delta = now.diff(eventTime, "day");
735
+
736
+ for (const mirrorProp in mirrorProps) {
737
+ const prop = mirrorProps[mirrorProp];
738
+ const { daysUnfilled = 7, events = "*", strategy = "create", values = [] } = prop;
739
+ if (events === "*" || events.includes(oldEvent.event)) {
740
+ if (!newEvent) newEvent = clone(oldEvent);
741
+
742
+ switch (strategy) {
743
+ case "create":
744
+ newEvent[mirrorProp] = u.choose(values);
745
+ break;
746
+ case "delete":
747
+ delete newEvent[mirrorProp];
748
+ break;
749
+ case "fill":
750
+ if (delta >= daysUnfilled) oldEvent[mirrorProp] = u.choose(values);
751
+ newEvent[mirrorProp] = u.choose(values);
752
+ break;
753
+ case "update":
754
+ if (!oldEvent[mirrorProp]) {
755
+ newEvent[mirrorProp] = u.choose(values);
756
+ }
757
+ else {
758
+ newEvent[mirrorProp] = oldEvent[mirrorProp];
759
+ }
760
+ break;
761
+ default:
762
+ throw new Error(`strategy ${strategy} is unknown`);
763
+ }
764
+
765
+
766
+ }
767
+ }
768
+
769
+ const mirrorDataPoint = newEvent ? newEvent : oldEvent;
770
+ await mirrorEventData.hookPush(mirrorDataPoint);
771
+
772
+ }
773
+ }
774
+
775
+
776
+ /*
777
+ --------------
778
+ ORCHESTRATORS
779
+ --------------
780
+ */
781
+
782
+
783
+ /**
784
+ * a loop that creates users and their events; the loop is inside this function
785
+ * @param {Config} config
786
+ * @param {Storage} storage
787
+ * @param {number} [concurrency]
788
+ * @return {Promise<void>}
789
+ */
790
+ async function userLoop(config, storage, concurrency = 1) {
791
+ const chance = u.getChance();
792
+ const USER_CONN = pLimit(concurrency);
793
+ const {
794
+ verbose,
795
+ numUsers,
796
+ numEvents,
797
+ isAnonymous,
798
+ hasAvatar,
799
+ hasAnonIds,
800
+ hasSessionIds,
801
+ hasLocation,
802
+ funnels,
803
+ userProps,
804
+ scdProps,
805
+ numDays,
806
+ } = config;
807
+ const { eventData, userProfilesData, scdTableData } = storage;
808
+ const avgEvPerUser = numEvents / numUsers;
809
+
810
+ for (let i = 1; i < numUsers; i++) {
811
+ await USER_CONN(async () => {
812
+ userCount++;
813
+ if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
814
+ const userId = chance.guid();
815
+ const user = u.generateUser(userId, { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds });
816
+ const { distinct_id, created } = user;
817
+ let numEventsPreformed = 0;
818
+
819
+ if (hasLocation) {
820
+ const location = u.choose(DEFAULTS.locationsUsers);
821
+ for (const key in location) {
822
+ user[key] = location[key];
823
+ }
824
+ }
825
+
826
+ // Profile creation
827
+ const profile = await makeProfile(userProps, user);
828
+ await userProfilesData.hookPush(profile);
829
+
830
+ // SCD creation
831
+ const scdTableKeys = Object.keys(scdProps);
832
+ const userSCD = {};
833
+ for (const [index, key] of scdTableKeys.entries()) {
834
+ const mutations = chance.integer({ min: 1, max: 10 }); //todo: configurable mutations?
835
+ const changes = await makeSCD(scdProps[key], key, distinct_id, mutations, created);
836
+ userSCD[key] = changes;
837
+ await scdTableData[index].hookPush(changes);
838
+ }
839
+
840
+ let numEventsThisUserWillPreform = Math.floor(chance.normal({
841
+ mean: avgEvPerUser,
842
+ dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
843
+ }) * 0.714159265359);
844
+
845
+ // Power users and Shitty users logic...
846
+ chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
847
+ chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
848
+ numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
849
+
850
+ let userFirstEventTime;
851
+
852
+ // First funnel logic...
853
+ const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
854
+ const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
855
+ const userIsBornInDataset = chance.bool({ likelihood: 30 });
856
+
857
+ if (firstFunnels.length && userIsBornInDataset) {
858
+ const firstFunnel = chance.pickone(firstFunnels, user);
859
+ const [data, userConverted] = await makeFunnel(firstFunnel, user, null, profile, userSCD, config);
860
+ userFirstEventTime = dayjs(data[0].time).unix();
861
+ numEventsPreformed += data.length;
862
+ await eventData.hookPush(data);
863
+ if (!userConverted) {
864
+ if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
865
+ return;
866
+ }
867
+ } else {
868
+ userFirstEventTime = dayjs(created).unix();
869
+ }
870
+
871
+ while (numEventsPreformed < numEventsThisUserWillPreform) {
872
+ if (usageFunnels.length) {
873
+ const currentFunnel = chance.pickone(usageFunnels);
874
+ const [data, userConverted] = await makeFunnel(currentFunnel, user, userFirstEventTime, profile, userSCD, config);
875
+ numEventsPreformed += data.length;
876
+ await eventData.hookPush(data);
877
+ } else {
878
+ const data = await makeEvent(distinct_id, userFirstEventTime, u.choose(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
879
+ numEventsPreformed++;
880
+ await eventData.hookPush(data);
881
+ }
882
+ }
883
+
884
+ if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
885
+ });
886
+ }
887
+
888
+ }
889
+
890
+
891
+ /**
892
+ * sends the data to mixpanel
893
+ * todo: this needs attention
894
+ * @param {Config} config
895
+ * @param {Storage} storage
896
+ */
897
+ async function sendToMixpanel(config, storage) {
898
+ const { adSpendData, eventData, groupProfilesData, lookupTableData, mirrorEventData, scdTableData, userProfilesData } = storage;
899
+ const { token, region, writeToDisk } = config;
900
+ const importResults = { events: {}, users: {}, groups: [] };
901
+
902
+ /** @type {import('mixpanel-import').Creds} */
903
+ const creds = { token };
904
+ const { format } = config;
905
+ const mpImportFormat = format === "json" ? "jsonl" : "csv";
906
+ /** @type {import('mixpanel-import').Options} */
907
+ const commonOpts = {
908
+ region,
909
+ fixData: true,
910
+ verbose: false,
911
+ forceStream: true,
912
+ strict: false,
913
+ dryRun: false,
914
+ abridged: false,
915
+ fixJson: true,
916
+ showProgress: true,
917
+ streamFormat: mpImportFormat
918
+ };
919
+
920
+
921
+
922
+ if (eventData || isBATCH_MODE) {
923
+ log(`importing events to mixpanel...\n`);
924
+ let eventDataToImport = clone(eventData);
925
+ if (isBATCH_MODE) {
926
+ const writeDir = eventData.getWriteDir();
927
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
928
+ eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
929
+ }
930
+ const imported = await mp(creds, eventDataToImport, {
931
+ recordType: "event",
932
+ ...commonOpts,
933
+ });
934
+ log(`\tsent ${comma(imported.success)} events\n`);
935
+ importResults.events = imported;
936
+ }
937
+ if (userProfilesData || isBATCH_MODE) {
938
+ log(`importing user profiles to mixpanel...\n`);
939
+ let userProfilesToImport = clone(userProfilesData);
940
+ if (isBATCH_MODE) {
941
+ const writeDir = userProfilesData.getWriteDir();
942
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
943
+ userProfilesToImport = files.filter(f => f.includes('-USERS-'));
944
+ }
945
+ const imported = await mp(creds, userProfilesToImport, {
946
+ recordType: "user",
947
+ ...commonOpts,
948
+ });
949
+ log(`\tsent ${comma(imported.success)} user profiles\n`);
950
+ importResults.users = imported;
951
+ }
952
+ if (adSpendData || isBATCH_MODE) {
953
+ log(`importing ad spend data to mixpanel...\n`);
954
+ let adSpendDataToImport = clone(adSpendData);
955
+ if (isBATCH_MODE) {
956
+ const writeDir = adSpendData.getWriteDir();
957
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
958
+ adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
959
+ }
960
+ const imported = await mp(creds, adSpendDataToImport, {
961
+ recordType: "event",
962
+ ...commonOpts,
963
+ });
964
+ log(`\tsent ${comma(imported.success)} ad spend events\n`);
965
+ importResults.adSpend = imported;
966
+ }
967
+ if (groupProfilesData || isBATCH_MODE) {
968
+ for (const groupEntity of groupProfilesData) {
969
+ const groupKey = groupEntity?.groupKey;
970
+ log(`importing ${groupKey} profiles to mixpanel...\n`);
971
+ let groupProfilesToImport = clone(groupEntity);
972
+ if (isBATCH_MODE) {
973
+ const writeDir = groupEntity.getWriteDir();
974
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
975
+ groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
976
+ }
977
+ const imported = await mp({ token, groupKey }, groupProfilesToImport, {
978
+ recordType: "group",
979
+ ...commonOpts,
980
+
981
+ });
982
+ log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
983
+
984
+ importResults.groups.push(imported);
985
+ }
986
+ }
987
+
988
+ //if we are in batch mode, we need to delete the files
989
+ if (!writeToDisk && isBATCH_MODE) {
990
+ const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
991
+ const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
992
+ const files = listDir.filter(f => f.includes('-EVENTS-') || f.includes('-USERS-') || f.includes('-AD-SPEND-') || f.includes('-GROUPS-'));
993
+ for (const file of files) {
994
+ await rm(file);
995
+ }
996
+ }
997
+ return importResults;
998
+ }
999
+
1000
+ /*
1001
+ ----
1002
+ META
1003
+ ----
1004
+ */
1005
+
1006
+
1007
+ /**
1008
+ * ensures that the config is valid and has all the necessary fields
1009
+ * also adds some defaults
1010
+ * @param {Config} config
1011
+ */
1012
+ function validateDungeonConfig(config) {
1013
+ const chance = u.getChance();
1014
+ let {
1015
+ seed,
1016
+ numEvents = 100_000,
1017
+ numUsers = 1000,
1018
+ numDays = 30,
1019
+ epochStart = 0,
1020
+ epochEnd = dayjs().unix(),
1021
+ events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
1022
+ superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
1023
+ funnels = [],
1024
+ userProps = {
1025
+ spiritAnimal: chance.animal.bind(chance),
1026
+ },
1027
+ scdProps = {},
1028
+ mirrorProps = {},
1029
+ groupKeys = [],
1030
+ groupProps = {},
1031
+ lookupTables = [],
1032
+ hasAnonIds = false,
1033
+ hasSessionIds = false,
1034
+ format = "csv",
1035
+ token = null,
1036
+ region = "US",
1037
+ writeToDisk = false,
1038
+ verbose = false,
1039
+ makeChart = false,
1040
+ soup = {},
1041
+ hook = (record) => record,
1042
+ hasAdSpend = false,
1043
+ hasCampaigns = false,
1044
+ hasLocation = false,
1045
+ hasAvatar = false,
1046
+ isAnonymous = false,
1047
+ hasBrowser = false,
1048
+ hasAndroidDevices = false,
1049
+ hasDesktopDevices = false,
1050
+ hasIOSDevices = false,
1051
+ name = "",
1052
+ batchSize = 500_000,
1053
+ concurrency = 500
1054
+ } = config;
1055
+
1056
+ //ensuring default for deep objects
1057
+ if (!config.superProps) config.superProps = superProps;
1058
+ if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
1059
+
1060
+ //setting up "TIME"
1061
+ if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
1062
+ if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
1063
+ if (epochStart && numDays) { } //noop
1064
+ if (!epochStart && !numDays) debugger; //never happens
1065
+
1066
+ config.simulationName = name || makeName();
1067
+ config.name = config.simulationName;
1068
+
1069
+ //max batch size
1070
+ if (batchSize > 0) BATCH_SIZE = batchSize;
1071
+
1072
+ // funnels
1073
+
1074
+ // FUNNEL INFERENCE
1075
+ if (!funnels || !funnels.length) {
1076
+ funnels = inferFunnels(events);
1077
+ }
1078
+
1079
+ config.concurrency = concurrency;
1080
+ config.funnels = funnels;
1081
+ config.batchSize = batchSize;
1082
+ config.seed = seed;
1083
+ config.numEvents = numEvents;
1084
+ config.numUsers = numUsers;
1085
+ config.numDays = numDays;
1086
+ config.epochStart = epochStart;
1087
+ config.epochEnd = epochEnd;
1088
+ config.events = events;
1089
+ config.superProps = superProps;
1090
+ config.funnels = funnels;
1091
+ config.userProps = userProps;
1092
+ config.scdProps = scdProps;
1093
+ config.mirrorProps = mirrorProps;
1094
+ config.groupKeys = groupKeys;
1095
+ config.groupProps = groupProps;
1096
+ config.lookupTables = lookupTables;
1097
+ config.hasAnonIds = hasAnonIds;
1098
+ config.hasSessionIds = hasSessionIds;
1099
+ config.format = format;
1100
+ config.token = token;
1101
+ config.region = region;
1102
+ config.writeToDisk = writeToDisk;
1103
+ config.verbose = verbose;
1104
+ config.makeChart = makeChart;
1105
+ config.soup = soup;
1106
+ config.hook = hook;
1107
+ config.hasAdSpend = hasAdSpend;
1108
+ config.hasCampaigns = hasCampaigns;
1109
+ config.hasLocation = hasLocation;
1110
+ config.hasAvatar = hasAvatar;
1111
+ config.isAnonymous = isAnonymous;
1112
+ config.hasBrowser = hasBrowser;
1113
+ config.hasAndroidDevices = hasAndroidDevices;
1114
+ config.hasDesktopDevices = hasDesktopDevices;
1115
+ config.hasIOSDevices = hasIOSDevices;
1116
+
1117
+ //event validation
1118
+ const validatedEvents = u.validateEventConfig(events);
1119
+ events = validatedEvents;
1120
+ config.events = validatedEvents;
1121
+
1122
+ return config;
1123
+ }
1124
+
1125
+ /**
1126
+ * our meta programming function which lets you mutate items as they are pushed into an array
1127
+ * it also does batching and writing to disk
1128
+ * it kind of is a class - as it produces new objects - but it's not a class
1129
+ * @param {Object} arr
1130
+ * @param {hookArrayOptions} opts
1131
+ * @returns {Promise<hookArray>}
1132
+ */
1133
+ async function makeHookArray(arr = [], opts = {}) {
1134
+ const { hook = a => a, type = "", filepath = "./defaultFile", format = "csv", concurrency = 1, ...rest } = opts;
1135
+ const FILE_CONN = pLimit(concurrency); // concurrent file writes
1136
+ let batch = 0;
1137
+ let writeDir;
1138
+ const dataFolder = path.resolve("./data");
1139
+ if (existsSync(dataFolder)) writeDir = dataFolder;
1140
+ else writeDir = path.resolve("./");
1141
+
1142
+ function getWritePath() {
1143
+ if (isBATCH_MODE) {
1144
+ return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
1145
+ }
1146
+ else {
1147
+ return path.join(writeDir, `${filepath}.${format}`);
1148
+ }
1149
+ }
1150
+
1151
+ function getWriteDir() {
1152
+ return path.join(writeDir, `${filepath}.${format}`);
1153
+ }
1154
+
1155
+ async function transformThenPush(item) {
1156
+ if (item === null || item === undefined) return false;
1157
+ if (typeof item === 'object' && Object.keys(item).length === 0) return false;
1158
+
1159
+ if (Array.isArray(item)) {
1160
+ for (const i of item) {
1161
+ try {
1162
+ const enriched = await hook(i, type, rest);
1163
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1164
+ else arr.push(enriched);
1165
+ } catch (e) {
1166
+ console.error(`\n\nyour hook had an error\n\n`, e);
1167
+ arr.push(i);
1168
+ }
1169
+ }
1170
+ } else {
1171
+ try {
1172
+ const enriched = await hook(item, type, rest);
1173
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1174
+ else arr.push(enriched);
1175
+ } catch (e) {
1176
+ console.error(`\n\nyour hook had an error\n\n`, e);
1177
+ arr.push(item);
1178
+ }
1179
+ }
1180
+
1181
+ if (arr.length > BATCH_SIZE) {
1182
+ isBATCH_MODE = true;
1183
+ batch++;
1184
+ const writePath = getWritePath();
1185
+ const writeResult = await FILE_CONN(() => writeToDisk(arr, { writePath }));
1186
+ return writeResult;
1187
+ } else {
1188
+ return Promise.resolve(false);
1189
+ }
1190
+ }
1191
+
1192
+ async function writeToDisk(data, options) {
1193
+ const { writePath } = options;
1194
+ let writeResult;
1195
+ if (VERBOSE) log(`\n\n\twriting ${writePath}\n\n`);
1196
+ switch (format) {
1197
+ case "csv":
1198
+ writeResult = await u.streamCSV(writePath, data);
1199
+ break;
1200
+ case "json":
1201
+ writeResult = await u.streamJSON(writePath, data);
1202
+ break;
1203
+ default:
1204
+ throw new Error(`format ${format} is not supported`);
1205
+ }
1206
+ if (isBATCH_MODE) data.length = 0;
1207
+ return writeResult;
1208
+ }
1209
+
1210
+ async function flush() {
1211
+ if (arr.length > 0) {
1212
+ batch++;
1213
+ const writePath = getWritePath();
1214
+ await FILE_CONN(() => writeToDisk(arr, { writePath }));
1215
+ if (isBATCH_MODE) arr.length = 0; // free up memory for batch mode
1216
+ }
1217
+ }
1218
+
1219
+ const enrichedArray = arr;
1220
+
1221
+ enrichedArray.hookPush = transformThenPush;
1222
+ enrichedArray.flush = flush;
1223
+ enrichedArray.getWriteDir = getWriteDir;
1224
+ enrichedArray.getWritePath = getWritePath;
1225
+
1226
+ for (const key in rest) {
1227
+ enrichedArray[key.toString()] = rest[key];
1228
+ }
1229
+
1230
+ return enrichedArray;
1231
+ }
1232
+
1233
+
1234
+ /**
1235
+ * create funnels out of random events
1236
+ * @param {EventConfig[]} events
1237
+ */
1238
+ function inferFunnels(events) {
1239
+ const createdFunnels = [];
1240
+ const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
1241
+ const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
1242
+ const numFunnelsToCreate = Math.ceil(usageEvents.length);
1243
+ /** @type {Funnel} */
1244
+ const funnelTemplate = {
1245
+ sequence: [],
1246
+ conversionRate: 50,
1247
+ order: 'sequential',
1248
+ requireRepeats: false,
1249
+ props: {},
1250
+ timeToConvert: 1,
1251
+ isFirstFunnel: false,
1252
+ weight: 1
1253
+ };
1254
+ if (firstEvents.length) {
1255
+ for (const event of firstEvents) {
1256
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
1257
+ }
1258
+ }
1259
+
1260
+ //at least one funnel with all usage events
1261
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
1262
+
1263
+ //for the rest, make random funnels
1264
+ followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
1265
+ /** @type {Funnel} */
1266
+ const funnel = { ...clone(funnelTemplate) };
1267
+ funnel.conversionRate = u.integer(25, 75);
1268
+ funnel.timeToConvert = u.integer(1, 10);
1269
+ funnel.weight = u.integer(1, 10);
1270
+ const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
1271
+ funnel.sequence = sequence;
1272
+ funnel.order = 'random';
1273
+ createdFunnels.push(funnel);
1274
+ }
1275
+
1276
+ return createdFunnels;
1277
+
1278
+ }
1279
+
1280
+
1281
+ /*
1282
+ ----
1283
+ CLI
1284
+ ----
1285
+ */
1286
+
1287
+ if (require.main === module) {
1288
+ isCLI = true;
1289
+ const args = /** @type {Config} */ (getCliParams());
1290
+ let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, hasSessionIds, hasAnonIds } = args;
1291
+ const suppliedConfig = args._[0];
1292
+
1293
+ //if the user specifies an separate config file
1294
+ let config = null;
1295
+ if (suppliedConfig) {
1296
+ console.log(`using ${suppliedConfig} for data\n`);
1297
+ config = require(path.resolve(suppliedConfig));
1298
+ }
1299
+ else {
1300
+ if (complex) {
1301
+ console.log(`... using default COMPLEX configuration [everything] ...\n`);
1302
+ console.log(`... for more simple data, don't use the --complex flag ...\n`);
1303
+ console.log(`... or specify your own js config file (see docs or --help) ...\n`);
1304
+ config = require(path.resolve(__dirname, "./schemas/complex.js"));
1305
+ }
1306
+ else {
1307
+ console.log(`... using default SIMPLE configuration [events + users] ...\n`);
1308
+ console.log(`... for more complex data, use the --complex flag ...\n`);
1309
+ config = require(path.resolve(__dirname, "./schemas/simple.js"));
1310
+ }
1311
+ }
1312
+
1313
+ //override config with cli params
1314
+ if (token) config.token = token;
1315
+ if (seed) config.seed = seed;
1316
+ if (format === "csv" && config.format === "json") format = "json";
1317
+ if (format) config.format = format;
1318
+ if (numDays) config.numDays = numDays;
1319
+ if (numUsers) config.numUsers = numUsers;
1320
+ if (numEvents) config.numEvents = numEvents;
1321
+ if (region) config.region = region;
1322
+ if (writeToDisk) config.writeToDisk = writeToDisk;
1323
+ if (writeToDisk === 'false') config.writeToDisk = false;
1324
+ if (hasSessionIds) config.hasSessionIds = hasSessionIds;
1325
+ if (hasAnonIds) config.hasAnonIds = hasAnonIds;
1326
+ config.verbose = true;
1327
+
1328
+ main(config)
1329
+ .then((data) => {
1330
+ //todo: rethink summary
1331
+ log(`-----------------SUMMARY-----------------`);
1332
+ const d = { success: 0, bytes: 0 };
1333
+ const darr = [d];
1334
+ const { events = d, groups = darr, users = d } = data?.importResults || {};
1335
+ const files = data.files;
1336
+ const folder = files?.[0]?.split(path.basename(files?.[0]))?.shift();
1337
+ const groupBytes = groups.reduce((acc, group) => {
1338
+ return acc + group.bytes;
1339
+ }, 0);
1340
+ const groupSuccess = groups.reduce((acc, group) => {
1341
+ return acc + group.success;
1342
+ }, 0);
1343
+ const bytes = events.bytes + groupBytes + users.bytes;
1344
+ const stats = {
1345
+ events: comma(events.success || 0),
1346
+ users: comma(users.success || 0),
1347
+ groups: comma(groupSuccess || 0),
1348
+ bytes: bytesHuman(bytes || 0),
1349
+ };
1350
+ if (bytes > 0) console.table(stats);
1351
+ log(`\nfiles written to ${folder || "no where; we didn't write anything"} ...`);
1352
+ log(" " + files?.flat().join("\n "));
1353
+ log(`\n----------------SUMMARY-----------------\n\n\n`);
1354
+ })
1355
+ .catch((e) => {
1356
+ log(`------------------ERROR------------------`);
1357
+ console.error(e);
1358
+ log(`------------------ERROR------------------`);
1359
+ debugger;
1360
+ })
1361
+ .finally(() => {
1362
+ log("enjoy your data! :)");
1363
+ u.openFinder(path.resolve("./data"));
1364
+ });
1365
+ } else {
1366
+ main.generators = { makeEvent, makeFunnel, makeProfile, makeSCD, makeAdSpend, makeMirror };
1367
+ main.orchestrators = { userLoop, validateDungeonConfig, sendToMixpanel };
1368
+ main.meta = { inferFunnels, hookArray: makeHookArray };
1369
+ module.exports = main;
1370
+ }
1371
+
1372
+
1373
+ /*
1374
+ ----
1375
+ HELPERS
1376
+ ----
1377
+ */
1378
+
1379
+ function log(...args) {
1380
+ const cwd = process.cwd(); // Get the current working directory
1381
+
1382
+ for (let i = 0; i < args.length; i++) {
1383
+ // Replace occurrences of the current working directory with "./" in string arguments
1384
+ if (typeof args[i] === 'string') {
1385
+ args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
1386
+ }
1387
+ }
1388
+ if (VERBOSE) console.log(...args);
1389
+ }
1390
+
1391
+ function track(name, props, ...rest) {
1392
+ if (process.env.NODE_ENV === 'test') return;
1393
+ metrics(name, props, ...rest);
1394
+ }
1395
+
1396
+
1397
+ /** @typedef {import('./types.js').Config} Config */
1398
+ /** @typedef {import('./types.js').AllData} AllData */
1399
+ /** @typedef {import('./types.js').EventConfig} EventConfig */
1400
+ /** @typedef {import('./types.js').Funnel} Funnel */
1401
+ /** @typedef {import('./types.js').Person} Person */
1402
+ /** @typedef {import('./types.js').SCDSchema} SCDSchema */
1403
+ /** @typedef {import('./types.js').UserProfile} UserProfile */
1404
+ /** @typedef {import('./types.js').EventSchema} EventSchema */
1405
+ /** @typedef {import('./types.js').Storage} Storage */
1406
+ /** @typedef {import('./types.js').Result} Result */
1407
+ /** @typedef {import('./types.js').ValueValid} ValueValid */
1408
+ /** @typedef {import('./types.js').HookedArray} hookArray */
1409
+ /** @typedef {import('./types.js').hookArrayOptions} hookArrayOptions */
1410
+ /** @typedef {import('./types.js').GroupProfileSchema} GroupProfile */