make-mp-data 1.4.5 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/core/index.js DELETED
@@ -1,1013 +0,0 @@
1
- #! /usr/bin/env node
2
-
3
- /*
4
- make fake mixpanel data easily!
5
- by AK
6
- ak@mixpanel.com
7
- */
8
-
9
-
10
- //!feature: mirror strategies
11
- //!feature: fixedTimeFunnel? if set this funnel will occur for all users at the same time ['cards charged', 'charge complete']
12
- //!feature: churn ... is churnFunnel, possible to return, etc
13
- //!bug: not writing adspend CSV
14
- //!bug: using --mc flag reverts to --complex for some reason
15
-
16
- //todo: send SCD data to mixpanel (blocked on dev)
17
- //todo: send and map lookup tables to mixpanel (also blocked on dev)
18
-
19
- /** @typedef {import('../types').Config} Config */
20
- /** @typedef {import('../types').EventConfig} EventConfig */
21
- /** @typedef {import('../types').Funnel} Funnel */
22
- /** @typedef {import('../types').Person} Person */
23
- /** @typedef {import('../types').SCDSchema} SCDTableRow */
24
- /** @typedef {import('../types').UserProfile} UserProfile */
25
- /** @typedef {import('../types').EventSchema} EventSpec */
26
-
27
- const dayjs = require("dayjs");
28
- const utc = require("dayjs/plugin/utc");
29
- dayjs.extend(utc);
30
- const NOW = dayjs('2024-02-02').unix(); //this is a FIXED POINT and we will shift it later
31
- global.NOW = NOW;
32
-
33
- const os = require("os");
34
- const path = require("path");
35
- const { comma, bytesHuman, makeName, md5, clone, tracker, uid } = require("ak-tools");
36
- const { generateLineChart } = require('./chart.js');
37
- const { version } = require('../package.json');
38
- const mp = require("mixpanel-import");
39
- const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
40
-
41
-
42
- const u = require("./utils.js");
43
- const getCliParams = require("./cli.js");
44
- const { campaigns, devices, locations } = require('./defaults.js');
45
-
46
- let VERBOSE = false;
47
- let isCLI = false;
48
- /** @type {Config} */
49
- let CONFIG;
50
- let CAMPAIGNS;
51
- let DEFAULTS;
52
- require('dotenv').config();
53
-
54
-
55
- function track(name, props, ...rest) {
56
- if (process.env.NODE_ENV === 'test') return;
57
- metrics(name, props, ...rest);
58
- }
59
-
60
-
61
-
62
- /**
63
- * generates fake mixpanel data
64
- * @param {Config} config
65
- */
66
- async function main(config) {
67
-
68
- //seed the random number generator
69
- // ^ this is critical; same seed = same data; seed can be passed in as an env var or in the config
70
- const seedWord = process.env.SEED || config.seed || "hello friend!";
71
- config.seed = seedWord;
72
- u.initChance(seedWord);
73
- const chance = u.getChance(); // ! this is the only safe way to get the chance instance
74
- let {
75
- seed,
76
- numEvents = 100000,
77
- numUsers = 1000,
78
- numDays = 30,
79
- epochStart = 0,
80
- epochEnd = dayjs().unix(),
81
- events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
82
- superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
83
- funnels = [],
84
- userProps = {
85
- spiritAnimal: chance.animal.bind(chance),
86
- },
87
- scdProps = {},
88
- mirrorProps = {},
89
- groupKeys = [],
90
- groupProps = {},
91
- lookupTables = [],
92
- anonIds = false,
93
- sessionIds = false,
94
- format = "csv",
95
- token = null,
96
- region = "US",
97
- writeToDisk = false,
98
- verbose = false,
99
- makeChart = false,
100
- soup = {},
101
- hook = (record) => record,
102
- hasAdSpend = false,
103
- hasCampaigns = false,
104
- hasLocation = false,
105
- isAnonymous = false,
106
- hasBrowser = false,
107
- hasAndroidDevices = false,
108
- hasDesktopDevices = false,
109
- hasIOSDevices = false
110
- } = config;
111
-
112
- if (!config.superProps) config.superProps = superProps;
113
- if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
114
-
115
-
116
- config.simulationName = makeName();
117
- const { simulationName } = config;
118
- if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
119
- if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
120
- if (epochStart && numDays) { } //noop
121
- if (!epochStart && !numDays) debugger; //never happens
122
- config.seed = seed;
123
- config.numEvents = numEvents;
124
- config.numUsers = numUsers;
125
- config.numDays = numDays;
126
- config.epochStart = epochStart;
127
- config.epochEnd = epochEnd;
128
- config.events = events;
129
- config.superProps = superProps;
130
- config.funnels = funnels;
131
- config.userProps = userProps;
132
- config.scdProps = scdProps;
133
- config.mirrorProps = mirrorProps;
134
- config.groupKeys = groupKeys;
135
- config.groupProps = groupProps;
136
- config.lookupTables = lookupTables;
137
- config.anonIds = anonIds;
138
- config.sessionIds = sessionIds;
139
- config.format = format;
140
- config.token = token;
141
- config.region = region;
142
- config.writeToDisk = writeToDisk;
143
- config.verbose = verbose;
144
- config.makeChart = makeChart;
145
- config.soup = soup;
146
- config.hook = hook;
147
- config.hasAdSpend = hasAdSpend;
148
- config.hasCampaigns = hasCampaigns;
149
- config.hasLocation = hasLocation;
150
- config.isAnonymous = isAnonymous;
151
- config.hasBrowser = hasBrowser;
152
- config.hasAndroidDevices = hasAndroidDevices;
153
- config.hasDesktopDevices = hasDesktopDevices;
154
- config.hasIOSDevices = hasIOSDevices;
155
-
156
- //event validation
157
- const validatedEvents = u.validateEventConfig(events);
158
- events = validatedEvents;
159
- config.events = validatedEvents;
160
-
161
- //globals
162
- global.MP_SIMULATION_CONFIG = config;
163
- CONFIG = config;
164
- VERBOSE = verbose;
165
- CAMPAIGNS = campaigns;
166
- DEFAULTS = {
167
- locations: u.pickAWinner(locations, 0),
168
- iOSDevices: u.pickAWinner(devices.iosDevices, 0),
169
- androidDevices: u.pickAWinner(devices.androidDevices, 0),
170
- desktopDevices: u.pickAWinner(devices.desktopDevices, 0),
171
- browsers: u.pickAWinner(devices.browsers, 0),
172
- campaigns: u.pickAWinner(campaigns, 0),
173
- };
174
-
175
- const runId = uid(42);
176
- let trackingParams = { runId, seed, numEvents, numUsers, numDays, anonIds, sessionIds, format, targetToken: token, region, writeToDisk, isCLI, version };
177
- track('start simulation', trackingParams);
178
-
179
- log(`------------------SETUP------------------`);
180
- log(`\nyour data simulation will heretofore be known as: \n\n\t${simulationName.toUpperCase()}...\n`);
181
- log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk, anonIds, sessionIds }, null, 2));
182
- log(`------------------SETUP------------------`, "\n");
183
-
184
- //setup all the data structures we will push into
185
- const eventData = u.hookArray([], { hook, type: "event", config });
186
- const userProfilesData = u.hookArray([], { hook, type: "user", config });
187
- const adSpendData = u.hookArray([], { hook, type: "ad-spend", config });
188
- const scdTableKeys = Object.keys(scdProps);
189
- const scdTableData = [];
190
- for (const [index, key] of scdTableKeys.entries()) {
191
- scdTableData[index] = u.hookArray([], { hook, type: "scd", config, scdKey: key });
192
- }
193
- const groupProfilesData = u.hookArray([], { hook, type: "group", config });
194
- const lookupTableData = u.hookArray([], { hook, type: "lookup", config });
195
- const avgEvPerUser = Math.ceil(numEvents / numUsers);
196
-
197
- // if no funnels, make some out of events...
198
- if (!funnels || !funnels.length) {
199
- funnels = u.inferFunnels(events);
200
- config.funnels = funnels;
201
- CONFIG = config;
202
- }
203
-
204
- //user loop
205
- log(`---------------SIMULATION----------------`, "\n\n");
206
- loopUsers: for (let i = 1; i < numUsers + 1; i++) {
207
- u.progress([["users", i], ["events", eventData.length]]);
208
- const userId = chance.guid();
209
- const user = u.person(userId, numDays, isAnonymous);
210
- const { distinct_id, created, anonymousIds, sessionIds } = user;
211
- let numEventsPreformed = 0;
212
-
213
- if (hasLocation) {
214
- const location = u.choose(clone(DEFAULTS.locations()).map(l => { delete l.country; return l; }));
215
- for (const key in location) {
216
- user[key] = location[key];
217
- }
218
- }
219
-
220
-
221
-
222
- // profile creation
223
- const profile = makeProfile(userProps, user);
224
- userProfilesData.hookPush(profile);
225
-
226
- //scd creation
227
- /** @type {Record<string, SCDTableRow[]>} */
228
- // @ts-ignore
229
- const userSCD = {};
230
- for (const [index, key] of scdTableKeys.entries()) {
231
- const mutations = chance.integer({ min: 1, max: 10 });
232
- const changes = makeSCD(scdProps[key], key, distinct_id, mutations, created);
233
- // @ts-ignore
234
- userSCD[key] = changes;
235
- scdTableData[index].hookPush(changes);
236
- }
237
-
238
- let numEventsThisUserWillPreform = Math.floor(chance.normal({
239
- mean: avgEvPerUser,
240
- dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
241
- }) * 0.714159265359);
242
-
243
- // power users do 5x more events
244
- chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
245
-
246
- // shitty users do 1/3 as many events
247
- chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
248
-
249
- numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
250
-
251
- let userFirstEventTime;
252
-
253
- //first funnel
254
- const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
255
- const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
256
- const userIsBornInDataset = chance.bool({ likelihood: 30 });
257
- if (firstFunnels.length && userIsBornInDataset) {
258
- /** @type {Funnel} */
259
- const firstFunnel = chance.pickone(firstFunnels, user);
260
-
261
- const [data, userConverted] = makeFunnel(firstFunnel, user, profile, userSCD, null, config);
262
- userFirstEventTime = dayjs(data[0].time).unix();
263
- numEventsPreformed += data.length;
264
- eventData.hookPush(data);
265
- if (!userConverted) continue loopUsers;
266
- }
267
-
268
- while (numEventsPreformed < numEventsThisUserWillPreform) {
269
- if (usageFunnels.length) {
270
- /** @type {Funnel} */
271
- const currentFunnel = chance.pickone(usageFunnels);
272
- const [data, userConverted] = makeFunnel(currentFunnel, user, profile, userSCD, userFirstEventTime, config);
273
- numEventsPreformed += data.length;
274
- eventData.hookPush(data);
275
- }
276
- }
277
- // end individual user loop
278
- }
279
-
280
- if (hasAdSpend) {
281
- const days = u.datesBetween(epochStart, epochEnd);
282
- for (const day of days) {
283
- const dailySpendData = makeAdSpend(day);
284
- for (const spendEvent of dailySpendData) {
285
- adSpendData.hookPush(spendEvent);
286
- }
287
- }
288
-
289
- }
290
-
291
- //flatten SCD tables
292
- scdTableData.forEach((table, index) => scdTableData[index] = table.flat());
293
-
294
- log("\n");
295
-
296
- // make group profiles
297
- for (const groupPair of groupKeys) {
298
- const groupKey = groupPair[0];
299
- const groupCardinality = groupPair[1];
300
- const groupProfiles = [];
301
- for (let i = 1; i < groupCardinality + 1; i++) {
302
- u.progress([["groups", i]]);
303
- const group = {
304
- [groupKey]: i,
305
- ...makeProfile(groupProps[groupKey])
306
- };
307
- group["distinct_id"] = i;
308
- groupProfiles.push(group);
309
- }
310
- groupProfilesData.hookPush({ key: groupKey, data: groupProfiles });
311
- }
312
- log("\n");
313
-
314
- // make lookup tables
315
- for (const lookupTable of lookupTables) {
316
- const { key, entries, attributes } = lookupTable;
317
- const data = [];
318
- for (let i = 1; i < entries + 1; i++) {
319
- u.progress([["lookups", i]]);
320
- const item = {
321
- [key]: i,
322
- ...makeProfile(attributes),
323
- };
324
- data.push(item);
325
- }
326
- lookupTableData.hookPush({ key, data });
327
- }
328
-
329
- // SHIFT TIME
330
- const actualNow = dayjs();
331
- const fixedNow = dayjs.unix(global.NOW);
332
- const timeShift = actualNow.diff(fixedNow, "second");
333
-
334
- eventData.forEach((event) => {
335
- try {
336
- const newTime = dayjs(event.time).add(timeShift, "second");
337
- event.time = newTime.toISOString();
338
- if (epochStart && newTime.unix() < epochStart) event = {};
339
- if (epochEnd && newTime.unix() > (epochEnd - 60 * 60)) event = {};
340
- }
341
- catch (e) {
342
- //noop
343
- }
344
- });
345
-
346
- // const dayShift = actualNow.diff(global.NOW, "day");
347
- // userProfilesData.forEach((profile) => {
348
- // const newTime = dayjs(profile.created).add(dayShift, "day");
349
- // profile.created = newTime.toISOString();
350
- // });
351
-
352
-
353
- // draw charts
354
- if (makeChart) {
355
- const bornEvents = config.events?.filter((e) => e.isFirstEvent)?.map(e => e.event) || [];
356
- const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
357
- const bornBehaviors = [...bornEvents, ...bornFunnels];
358
- const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
359
- }
360
-
361
- // create mirrorProps
362
- let mirrorEventData = [];
363
- const mirrorPropKeys = Object.keys(mirrorProps);
364
- if (mirrorPropKeys.length) {
365
- mirrorEventData = clone(eventData);
366
- for (const row of mirrorEventData) {
367
- for (const key of mirrorPropKeys) {
368
- if (mirrorProps[key]?.events?.includes(row?.event)) row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
369
- if (mirrorProps[key]?.events === "*") row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
370
- }
371
- }
372
- }
373
-
374
- const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder, adSpendFiles } =
375
- u.buildFileNames(config);
376
- const pairs = [
377
- [eventFiles, [eventData]],
378
- [userFiles, [userProfilesData]],
379
- [adSpendFiles, [adSpendData]],
380
- [scdFiles, scdTableData],
381
- [groupFiles, groupProfilesData],
382
- [lookupFiles, lookupTableData],
383
- [mirrorFiles, [mirrorEventData]],
384
- ];
385
- log("\n");
386
- log(`---------------SIMULATION----------------`, "\n");
387
-
388
- if (!writeToDisk && !token) {
389
- track('end simulation', trackingParams);
390
- return {
391
- eventData,
392
- userProfilesData,
393
- scdTableData,
394
- groupProfilesData,
395
- lookupTableData,
396
- mirrorEventData,
397
- importResults: {},
398
- files: []
399
- };
400
- }
401
- log(`-----------------WRITES------------------`, `\n\n`);
402
-
403
- let writeFilePromises = [];
404
- if (writeToDisk) {
405
- if (verbose) log(`writing files... for ${simulationName}`);
406
- loopFiles: for (const ENTITY of pairs) {
407
- const [paths, data] = ENTITY;
408
- if (!data.length) continue loopFiles;
409
- for (const [index, path] of paths.entries()) {
410
- let TABLE;
411
- //group + lookup tables are structured differently
412
- if (data?.[index]?.["key"]) {
413
- TABLE = data[index].data;
414
- }
415
- else {
416
- TABLE = data[index];
417
- }
418
-
419
- log(`\twriting ${path}`);
420
- //if it's a lookup table, it's always a CSV
421
- if (format === "csv" || path.includes("-LOOKUP.csv")) {
422
- writeFilePromises.push(u.streamCSV(path, TABLE));
423
- }
424
- else {
425
- writeFilePromises.push(u.streamJSON(path, TABLE));
426
- }
427
-
428
- }
429
- }
430
- }
431
- const fileWriteResults = await Promise.all(writeFilePromises);
432
-
433
- const importResults = { events: {}, users: {}, groups: [] };
434
-
435
- //send to mixpanel
436
- if (token) {
437
- /** @type {import('mixpanel-import').Creds} */
438
- const creds = { token };
439
- /** @type {import('mixpanel-import').Options} */
440
- const commonOpts = {
441
- region,
442
- fixData: true,
443
- verbose: false,
444
- forceStream: true,
445
- strict: false, //! sometimes we get events in the future... it happens
446
- dryRun: false,
447
- abridged: false,
448
- fixJson: true,
449
- showProgress: true
450
- };
451
-
452
- if (eventData) {
453
- log(`importing events to mixpanel...\n`);
454
- const imported = await mp(creds, clone(eventData), {
455
- recordType: "event",
456
- ...commonOpts,
457
- });
458
- log(`\tsent ${comma(imported.success)} events\n`);
459
- importResults.events = imported;
460
- }
461
- if (userProfilesData && userProfilesData.length) {
462
- log(`importing user profiles to mixpanel...\n`);
463
- const imported = await mp(creds, clone(userProfilesData), {
464
- recordType: "user",
465
- ...commonOpts,
466
- });
467
- log(`\tsent ${comma(imported.success)} user profiles\n`);
468
- importResults.users = imported;
469
- }
470
- if (adSpendData && adSpendData.length) {
471
- log(`importing ad spend data to mixpanel...\n`);
472
- const imported = await mp(creds, clone(adSpendData), {
473
- recordType: "event",
474
- ...commonOpts,
475
- });
476
- log(`\tsent ${comma(imported.success)} ad spend events\n`);
477
- importResults.adSpend = imported;
478
- }
479
- if (groupProfilesData) {
480
- for (const groupProfiles of groupProfilesData) {
481
- const groupKey = groupProfiles.key;
482
- const data = groupProfiles.data;
483
- log(`importing ${groupKey} profiles to mixpanel...\n`);
484
- const imported = await mp({ token, groupKey }, clone(data), {
485
- recordType: "group",
486
- ...commonOpts,
487
-
488
- });
489
- log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
490
-
491
- importResults.groups.push(imported);
492
- }
493
- }
494
- }
495
- log(`\n-----------------WRITES------------------`, "\n");
496
- track('end simulation', trackingParams);
497
-
498
- return {
499
- importResults,
500
- files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder],
501
- eventData,
502
- userProfilesData,
503
- scdTableData,
504
- groupProfilesData,
505
- lookupTableData,
506
- mirrorEventData,
507
- adSpendData
508
- };
509
- }
510
-
511
-
512
-
513
-
514
-
515
-
516
- /**
517
- * creates a random event
518
- * @param {string} distinct_id
519
- * @param {string[]} anonymousIds
520
- * @param {string[]} sessionIds
521
- * @param {number} earliestTime
522
- * @param {EventConfig} chosenEvent
523
- * @param {Object} superProps
524
- * @param {Object} groupKeys
525
- * @param {Boolean} isFirstEvent=false
526
- */
527
- function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, chosenEvent, superProps, groupKeys, isFirstEvent = false) {
528
- const chance = u.getChance();
529
- const { mean = 0, deviation = 2, peaks = 5 } = CONFIG.soup;
530
- const { hasAndroidDevices, hasBrowser, hasCampaigns, hasDesktopDevices, hasIOSDevices, hasLocation } = CONFIG;
531
- //event model
532
- const eventTemplate = {
533
- event: chosenEvent.event,
534
- source: "dm4",
535
- };
536
-
537
- let defaultProps = {};
538
- let devicePool = [];
539
- if (hasLocation) defaultProps.location = clone(DEFAULTS.locations()).map(l => { delete l.country_code; return l; });
540
- if (hasBrowser) defaultProps.browser = DEFAULTS.browsers();
541
- if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
542
- if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
543
- if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
544
- // we don't always have campaigns, because of attribution
545
- if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = DEFAULTS.campaigns();
546
- const devices = devicePool.flat();
547
- if (devices.length) defaultProps.device = devices;
548
-
549
-
550
- //event time
551
- if (earliestTime > NOW) {
552
- earliestTime = dayjs.unix(NOW).subtract(2, 'd').unix();
553
- };
554
-
555
- if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
556
- if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, NOW, peaks, deviation, mean);
557
-
558
- // anonymous and session ids
559
- if (CONFIG?.anonIds) eventTemplate.device_id = chance.pickone(anonymousIds);
560
- if (CONFIG?.sessionIds) eventTemplate.session_id = chance.pickone(sessionIds);
561
-
562
- //sometimes have a user_id
563
- if (!isFirstEvent && chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
564
-
565
- // ensure that there is a user_id or device_id
566
- if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
567
-
568
- const props = { ...chosenEvent.properties, ...superProps };
569
-
570
- //iterate through custom properties
571
- for (const key in props) {
572
- try {
573
- eventTemplate[key] = u.choose(props[key]);
574
- } catch (e) {
575
- console.error(`error with ${key} in ${chosenEvent.event} event`, e);
576
- debugger;
577
- }
578
- }
579
-
580
- //iterate through default properties
581
- for (const key in defaultProps) {
582
- if (Array.isArray(defaultProps[key])) {
583
- const choice = u.choose(defaultProps[key]);
584
- if (typeof choice === "string") {
585
- if (!eventTemplate[key]) eventTemplate[key] = choice;
586
- }
587
-
588
- else if (Array.isArray(choice)) {
589
- for (const subChoice of choice) {
590
- if (!eventTemplate[key]) eventTemplate[key] = subChoice;
591
- }
592
- }
593
-
594
- else if (typeof choice === "object") {
595
- for (const subKey in choice) {
596
- if (typeof choice[subKey] === "string") {
597
- if (!eventTemplate[subKey]) eventTemplate[subKey] = choice[subKey];
598
- }
599
- else if (Array.isArray(choice[subKey])) {
600
- const subChoice = u.choose(choice[subKey]);
601
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
602
- }
603
-
604
- else if (typeof choice[subKey] === "object") {
605
- for (const subSubKey in choice[subKey]) {
606
- if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = choice[subKey][subSubKey];
607
- }
608
- }
609
-
610
- }
611
- }
612
-
613
-
614
- }
615
- }
616
-
617
- //iterate through groups
618
- for (const groupPair of groupKeys) {
619
- const groupKey = groupPair[0];
620
- const groupCardinality = groupPair[1];
621
- const groupEvents = groupPair[2] || [];
622
-
623
- // empty array for group events means all events
624
- if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
625
- if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
626
- }
627
-
628
- //make $insert_id
629
- eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
630
-
631
- return eventTemplate;
632
- }
633
-
634
- /**
635
- * from a funnel spec to a funnel that a user completes/doesn't complete
636
- * this is called MANY times per user
637
- * @param {Funnel} funnel
638
- * @param {Person} user
639
- * @param {UserProfile} profile
640
- * @param {Record<string, SCDTableRow[]>} scd
641
- * @param {number} firstEventTime
642
- * @param {Config} config
643
- * @return {[EventSpec[], Boolean]}
644
- */
645
- function makeFunnel(funnel, user, profile, scd, firstEventTime, config) {
646
- const chance = u.getChance();
647
- const { hook } = config;
648
- hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
649
- let {
650
- sequence,
651
- conversionRate = 50,
652
- order = 'sequential',
653
- timeToConvert = 1,
654
- props,
655
- requireRepeats = false,
656
- } = funnel;
657
- const { distinct_id, created, anonymousIds, sessionIds } = user;
658
- const { superProps, groupKeys } = config;
659
- const { name, email } = profile;
660
-
661
- //choose the properties for this funnel
662
- const chosenFunnelProps = { ...props, ...superProps };
663
- for (const key in props) {
664
- try {
665
- chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
666
- } catch (e) {
667
- console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
668
- debugger;
669
- }
670
- }
671
-
672
- const funnelPossibleEvents = sequence
673
- .map((eventName) => {
674
- const foundEvent = config.events.find((e) => e.event === eventName);
675
- /** @type {EventConfig} */
676
- const eventSpec = clone(foundEvent) || { event: eventName, properties: {} };
677
- for (const key in eventSpec.properties) {
678
- try {
679
- eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
680
- } catch (e) {
681
- console.error(`error with ${key} in ${eventSpec.event} event`, e);
682
- debugger;
683
- }
684
- }
685
- delete eventSpec.isFirstEvent;
686
- delete eventSpec.weight;
687
- eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
688
- return eventSpec;
689
- })
690
- .reduce((acc, step) => {
691
- if (!requireRepeats) {
692
- if (acc.find(e => e.event === step.event)) {
693
- if (chance.bool({ likelihood: 50 })) {
694
- conversionRate = Math.floor(conversionRate * 1.25); //increase conversion rate
695
- acc.push(step);
696
- }
697
- //A SKIPPED STEP!
698
- else {
699
- conversionRate = Math.floor(conversionRate * .75); //reduce conversion rate
700
- return acc; //early return to skip the step
701
- }
702
- }
703
- else {
704
- acc.push(step);
705
- }
706
- }
707
- else {
708
- acc.push(step);
709
- }
710
- return acc;
711
- }, []);
712
-
713
- let doesUserConvert = chance.bool({ likelihood: conversionRate });
714
- let numStepsUserWillTake = sequence.length;
715
- if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
716
- const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
717
- const msInHour = 60000 * 60;
718
- const funnelStepsUserWillTake = funnelPossibleEvents.slice(0, numStepsUserWillTake);
719
-
720
- let funnelActualOrder = [];
721
-
722
- switch (order) {
723
- case "sequential":
724
- funnelActualOrder = funnelStepsUserWillTake;
725
- break;
726
- case "random":
727
- funnelActualOrder = u.shuffleArray(funnelStepsUserWillTake);
728
- break;
729
- case "first-fixed":
730
- funnelActualOrder = u.shuffleExceptFirst(funnelStepsUserWillTake);
731
- break;
732
- case "last-fixed":
733
- funnelActualOrder = u.shuffleExceptLast(funnelStepsUserWillTake);
734
- break;
735
- case "first-and-last-fixed":
736
- funnelActualOrder = u.fixFirstAndLast(funnelStepsUserWillTake);
737
- break;
738
- case "middle-fixed":
739
- funnelActualOrder = u.shuffleOutside(funnelStepsUserWillTake);
740
- break;
741
- case "interrupted":
742
- const potentialSubstitutes = config?.events
743
- ?.filter(e => !e.isFirstEvent)
744
- ?.filter(e => !sequence.includes(e.event)) || [];
745
- funnelActualOrder = u.interruptArray(funnelStepsUserWillTake, potentialSubstitutes);
746
- break;
747
- default:
748
- funnelActualOrder = funnelStepsUserWillTake;
749
- break;
750
- }
751
-
752
-
753
-
754
- let lastTimeJump = 0;
755
- const funnelActualEventsWithOffset = funnelActualOrder
756
- .map((event, index) => {
757
- if (index === 0) {
758
- event.relativeTimeMs = 0;
759
- return event;
760
- }
761
-
762
- // Calculate base increment for each step
763
- const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
764
-
765
- // Introduce a random fluctuation factor
766
- const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
767
-
768
- // Ensure the time increments are increasing and add randomness
769
- const previousTime = lastTimeJump;
770
- const currentTime = previousTime + baseIncrement + fluctuation;
771
-
772
- // Assign the calculated time to the event
773
- const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
774
- lastTimeJump = chosenTime;
775
- event.relativeTimeMs = chosenTime;
776
- return event;
777
- });
778
-
779
-
780
- const earliestTime = firstEventTime || dayjs(created).unix();
781
- let funnelStartTime;
782
- let finalEvents = funnelActualEventsWithOffset
783
- .map((event, index) => {
784
- const newEvent = makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, event, {}, groupKeys);
785
- if (index === 0) {
786
- funnelStartTime = dayjs(newEvent.time);
787
- delete newEvent.relativeTimeMs;
788
- return newEvent;
789
- }
790
- try {
791
- newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
792
- delete newEvent.relativeTimeMs;
793
- return newEvent;
794
- }
795
- catch (e) {
796
-
797
- debugger;
798
- }
799
- });
800
-
801
-
802
- hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
803
- return [finalEvents, doesUserConvert];
804
- }
805
-
806
-
807
- function makeProfile(props, defaults) {
808
- //build the spec
809
- const profile = {
810
- ...defaults,
811
- };
812
-
813
- // anonymous and session ids
814
- if (!CONFIG?.anonIds) delete profile.anonymousIds;
815
- if (!CONFIG?.sessionIds) delete profile.sessionIds;
816
-
817
- for (const key in props) {
818
- try {
819
- profile[key] = u.choose(props[key]);
820
- } catch (e) {
821
- // debugger;
822
- }
823
- }
824
-
825
- return profile;
826
- }
827
-
828
- /**
829
- * @param {import('../types').ValueValid} prop
830
- * @param {string} scdKey
831
- * @param {string} distinct_id
832
- * @param {number} mutations
833
- * @param {string} created
834
- */
835
- function makeSCD(prop, scdKey, distinct_id, mutations, created) {
836
- if (JSON.stringify(prop) === "{}") return {};
837
- if (JSON.stringify(prop) === "[]") return [];
838
- const scdEntries = [];
839
- let lastInserted = dayjs(created);
840
- const deltaDays = dayjs().diff(lastInserted, "day");
841
-
842
- for (let i = 0; i < mutations; i++) {
843
- if (lastInserted.isAfter(dayjs())) break;
844
- const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
845
- scd.startTime = lastInserted.toISOString();
846
- lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
847
- scd.insertTime = lastInserted.toISOString();
848
- scdEntries.push({ ...scd });
849
- lastInserted = lastInserted
850
- .add(u.integer(0, deltaDays), "day")
851
- .subtract(u.integer(1, 1000), "seconds");
852
- }
853
-
854
- return scdEntries;
855
- }
856
-
857
- //todo
858
- function makeAdSpend(day) {
859
- const chance = u.getChance();
860
- const adSpendEvents = [];
861
- for (const network of CAMPAIGNS) {
862
- const campaigns = network.utm_campaign;
863
- loopCampaigns: for (const campaign of campaigns) {
864
- if (campaign === "$organic") continue loopCampaigns;
865
-
866
- const CAC = u.integer(42, 420); //todo: get the # of users created in this day from eventData
867
- // Randomly generating cost
868
- const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
869
-
870
- // Ensuring realistic CPC and CTR
871
- const avgCPC = chance.floating({ min: 0.33, max: 2.00, fixed: 4 });
872
- const avgCTR = chance.floating({ min: 0.05, max: 0.25, fixed: 4 });
873
-
874
- // Deriving impressions from cost and avg CPC
875
- const clicks = Math.floor(cost / avgCPC);
876
- const impressions = Math.floor(clicks / avgCTR);
877
- const views = Math.floor(impressions * avgCTR);
878
-
879
- //tags
880
- const utm_medium = u.choose(u.pickAWinner(network.utm_medium)());
881
- const utm_content = u.choose(u.pickAWinner(network.utm_content)());
882
- const utm_term = u.choose(u.pickAWinner(network.utm_term)());
883
- //each of these is a campaign
884
- const adSpendEvent = {
885
- event: "$ad_spend",
886
- time: day,
887
- source: 'dm4',
888
- utm_campaign: campaign,
889
- campaign_id: md5(network.utm_source[0] + '-' + campaign),
890
- network: network.utm_source[0].toUpperCase(),
891
- distinct_id: network.utm_source[0].toUpperCase(),
892
- utm_source: network.utm_source[0],
893
- utm_medium,
894
- utm_content,
895
- utm_term,
896
-
897
- clicks,
898
- views,
899
- impressions,
900
- cost,
901
- date: dayjs(day).format("YYYY-MM-DD"),
902
- };
903
- adSpendEvents.push(adSpendEvent);
904
- }
905
-
906
-
907
- }
908
- return adSpendEvents;
909
- }
910
-
911
-
912
-
913
-
914
-
915
-
916
-
917
- // this is for CLI
918
- if (require.main === module) {
919
- isCLI = true;
920
- const args = getCliParams();
921
- // @ts-ignore
922
- let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, sessionIds, anonIds } = args;
923
- // @ts-ignore
924
- const suppliedConfig = args._[0];
925
-
926
- //if the user specifies an separate config file
927
- let config = null;
928
- if (suppliedConfig) {
929
- console.log(`using ${suppliedConfig} for data\n`);
930
- config = require(path.resolve(suppliedConfig));
931
- }
932
- else {
933
- if (complex) {
934
- console.log(`... using default COMPLEX configuration [everything] ...\n`);
935
- console.log(`... for more simple data, don't use the --complex flag ...\n`);
936
- console.log(`... or specify your own js config file (see docs or --help) ...\n`);
937
- config = require(path.resolve(__dirname, "../schemas/complex.js"));
938
- }
939
- else {
940
- console.log(`... using default SIMPLE configuration [events + users] ...\n`);
941
- console.log(`... for more complex data, use the --complex flag ...\n`);
942
- config = require(path.resolve(__dirname, "../schemas/simple.js"));
943
- }
944
- }
945
-
946
- //override config with cli params
947
- if (token) config.token = token;
948
- if (seed) config.seed = seed;
949
- if (format === "csv" && config.format === "json") format = "json";
950
- if (format) config.format = format;
951
- if (numDays) config.numDays = numDays;
952
- if (numUsers) config.numUsers = numUsers;
953
- if (numEvents) config.numEvents = numEvents;
954
- if (region) config.region = region;
955
- if (writeToDisk) config.writeToDisk = writeToDisk;
956
- if (writeToDisk === 'false') config.writeToDisk = false;
957
- if (sessionIds) config.sessionIds = sessionIds;
958
- if (anonIds) config.anonIds = anonIds;
959
- config.verbose = true;
960
-
961
- main(config)
962
- .then((data) => {
963
- log(`-----------------SUMMARY-----------------`);
964
- const d = { success: 0, bytes: 0 };
965
- const darr = [d];
966
- const { events = d, groups = darr, users = d } = data.importResults;
967
- const files = data.files;
968
- const folder = files?.pop();
969
- const groupBytes = groups.reduce((acc, group) => {
970
- return acc + group.bytes;
971
- }, 0);
972
- const groupSuccess = groups.reduce((acc, group) => {
973
- return acc + group.success;
974
- }, 0);
975
- const bytes = events.bytes + groupBytes + users.bytes;
976
- const stats = {
977
- events: comma(events.success || 0),
978
- users: comma(users.success || 0),
979
- groups: comma(groupSuccess || 0),
980
- bytes: bytesHuman(bytes || 0),
981
- };
982
- if (bytes > 0) console.table(stats);
983
- log(`\nfiles written to ${folder || "no where; we didn't write anything"} ...`);
984
- log(" " + files?.flat().join("\n "));
985
- log(`\n----------------SUMMARY-----------------\n\n\n`);
986
- })
987
- .catch((e) => {
988
- log(`------------------ERROR------------------`);
989
- console.error(e);
990
- log(`------------------ERROR------------------`);
991
- debugger;
992
- })
993
- .finally(() => {
994
- log("have a wonderful day :)");
995
- u.openFinder(path.resolve("./data"));
996
- });
997
- } else {
998
- main.utils = { ...u };
999
- module.exports = main;
1000
- }
1001
-
1002
-
1003
- function log(...args) {
1004
- const cwd = process.cwd(); // Get the current working directory
1005
-
1006
- for (let i = 0; i < args.length; i++) {
1007
- // Replace occurrences of the current working directory with "./" in string arguments
1008
- if (typeof args[i] === 'string') {
1009
- args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
1010
- }
1011
- }
1012
- if (VERBOSE) console.log(...args);
1013
- }