make-mp-data 1.3.4 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6,25 +6,35 @@ by AK
6
6
  ak@mixpanel.com
7
7
  */
8
8
 
9
- const RUNTIME = process.env.RUNTIME || "unspecified";
10
- const mp = require("mixpanel-import");
11
- const path = require("path");
12
- const Chance = require("chance");
13
- const chance = new Chance();
14
- const { comma, bytesHuman, mkdir, makeName, md5, clone, tracker, uid } = require("ak-tools");
15
- const u = require("./utils.js");
16
- const AKsTimeSoup = require("./timesoup.js");
9
+ //todo: churn ... is churnFunnel, possible to return, etc
10
+ //todo: fixedTimeFunnel? if set this funnel will occur for all users at the same time ['cart charged', 'charge complete']
11
+ //todo defaults!!!
12
+ //todo ads-data
13
+
14
+
17
15
  const dayjs = require("dayjs");
18
16
  const utc = require("dayjs/plugin/utc");
19
17
  dayjs.extend(utc);
18
+ const NOW = dayjs('2024-02-02').unix(); //this is a FIXED POINT and we will shift it later
19
+ global.NOW = NOW;
20
+ const mp = require("mixpanel-import");
21
+ const path = require("path");
22
+ const { comma, bytesHuman, makeName, md5, clone, tracker, uid } = require("ak-tools");
23
+ const { generateLineChart } = require('./chart.js');
24
+ const { version } = require('./package.json');
25
+ const os = require("os");
26
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
27
+
28
+
29
+ const u = require("./utils.js");
20
30
  const cliParams = require("./cli.js");
21
- const NOW = dayjs().unix();
31
+
22
32
  let VERBOSE = false;
23
33
  let isCLI = false;
34
+ let CONFIG;
35
+ require('dotenv').config();
36
+
24
37
 
25
- const { version } = require('./package.json');
26
- const os = require("os");
27
- const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
28
38
  function track(name, props, ...rest) {
29
39
  if (process.env.NODE_ENV === 'test') return;
30
40
  metrics(name, props, ...rest);
@@ -32,19 +42,33 @@ function track(name, props, ...rest) {
32
42
 
33
43
  /** @typedef {import('./types.d.ts').Config} Config */
34
44
  /** @typedef {import('./types.d.ts').EventConfig} EventConfig */
45
+ /** @typedef {import('./types.d.ts').Funnel} Funnel */
46
+ /** @typedef {import('./types.d.ts').Person} Person */
47
+ /** @typedef {import('./types.d.ts').SCDTableRow} SCDTableRow */
48
+ /** @typedef {import('./types.d.ts').UserProfile} UserProfile */
49
+ /** @typedef {import('./types.d.ts').EventSpec} EventSpec */
35
50
 
36
51
  /**
37
52
  * generates fake mixpanel data
38
53
  * @param {Config} config
39
54
  */
40
55
  async function main(config) {
56
+ //PARAMS
57
+ const seedWord = process.env.SEED || config.seed || "hello friend!";
58
+ config.seed = seedWord;
59
+ u.initChance(seedWord);
60
+ const chance = u.getChance();
61
+ config.chance = chance;
41
62
  let {
42
- seed = "every time a rug is micturated upon in this fair city...",
63
+ seed,
43
64
  numEvents = 100000,
44
65
  numUsers = 1000,
45
66
  numDays = 30,
67
+ epochStart = 0,
68
+ epochEnd = dayjs().unix(),
46
69
  events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
47
70
  superProps = { platform: ["web", "iOS", "Android"] },
71
+ funnels = [],
48
72
  userProps = {
49
73
  favoriteColor: ["red", "green", "blue", "yellow"],
50
74
  spiritAnimal: chance.animal.bind(chance),
@@ -61,131 +85,148 @@ async function main(config) {
61
85
  region = "US",
62
86
  writeToDisk = false,
63
87
  verbose = false,
88
+ makeChart = false,
89
+ soup = {},
64
90
  hook = (record) => record,
65
91
  } = config;
92
+ if (!config.superProps) config.superProps = superProps;
93
+ if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
66
94
  VERBOSE = verbose;
67
95
  config.simulationName = makeName();
68
96
  const { simulationName } = config;
97
+ if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
98
+ if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
99
+ if (epochStart && numDays) { } //noop
100
+ if (!epochStart && !numDays) debugger; //never happens
101
+ config.seed = seed;
102
+ config.numEvents = numEvents;
103
+ config.numUsers = numUsers;
104
+ config.numDays = numDays;
105
+ config.epochStart = epochStart;
106
+ config.epochEnd = epochEnd;
107
+ config.events = events;
108
+ config.superProps = superProps;
109
+ config.funnels = funnels;
110
+ config.userProps = userProps;
111
+ config.scdProps = scdProps;
112
+ config.mirrorProps = mirrorProps;
113
+ config.groupKeys = groupKeys;
114
+ config.groupProps = groupProps;
115
+ config.lookupTables = lookupTables;
116
+ config.anonIds = anonIds;
117
+ config.sessionIds = sessionIds;
118
+ config.format = format;
119
+ config.token = token;
120
+ config.region = region;
121
+ config.writeToDisk = writeToDisk;
122
+ config.verbose = verbose;
123
+ config.makeChart = makeChart;
124
+ config.soup = soup;
125
+ config.hook = hook;
126
+
127
+ //event validation
128
+ const validatedEvents = u.validateEventConfig(events);
129
+ events = validatedEvents;
130
+ config.events = validatedEvents;
69
131
  global.MP_SIMULATION_CONFIG = config;
70
- const uuidChance = new Chance(seed);
132
+ CONFIG = config;
71
133
  const runId = uid(42);
72
- track('start simulation', {
73
- runId,
74
- seed,
75
- numEvents,
76
- numUsers,
77
- numDays,
78
- anonIds,
79
- sessionIds,
80
- format,
81
- targetToken: token,
82
- region,
83
- writeToDisk,
84
- isCLI,
85
- version
86
- });
134
+ let trackingParams = { runId, seed, numEvents, numUsers, numDays, anonIds, sessionIds, format, targetToken: token, region, writeToDisk, isCLI, version };
135
+ track('start simulation', trackingParams);
136
+
87
137
  log(`------------------SETUP------------------`);
88
138
  log(`\nyour data simulation will heretofore be known as: \n\n\t${simulationName.toUpperCase()}...\n`);
89
139
  log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk, anonIds, sessionIds }, null, 2));
90
140
  log(`------------------SETUP------------------`, "\n");
91
141
 
92
-
93
- //the function which generates $distinct_id + $anonymous_ids, $session_ids, and $created, skewing towards the present
94
- function generateUser() {
95
- const distinct_id = uuidChance.guid();
96
- let z = u.boxMullerRandom();
97
- const skew = chance.normal({ mean: 10, dev: 3 });
98
- z = u.applySkew(z, skew);
99
-
100
- // Scale and shift the normally distributed value to fit the range of days
101
- const maxZ = u.integer(2, 4);
102
- const scaledZ = (z / maxZ + 1) / 2;
103
- const daysAgoBorn = Math.round(scaledZ * (numDays - 1)) + 1;
104
-
105
- return {
106
- distinct_id,
107
- ...u.person(daysAgoBorn),
108
- };
109
- }
110
-
111
- // weigh events for random selection
112
- const weightedEvents = events
113
- .reduce((acc, event) => {
114
- const weight = event.weight || 1;
115
- for (let i = 0; i < weight; i++) {
116
-
117
- // @ts-ignore
118
- acc.push(event);
119
- }
120
- return acc;
121
- }, [])
122
-
123
- // @ts-ignore
124
- .filter((e) => !e.isFirstEvent);
125
-
126
- const firstEvents = events.filter((e) => e.isFirstEvent);
127
- const eventData = enrichArray([], { hook, type: "event", config });
128
- const userProfilesData = enrichArray([], { hook, type: "user", config });
142
+ //setup all the data structures we will push into
143
+ const eventData = u.enrichArray([], { hook, type: "event", config });
144
+ const userProfilesData = u.enrichArray([], { hook, type: "user", config });
129
145
  const scdTableKeys = Object.keys(scdProps);
130
146
  const scdTableData = [];
131
147
  for (const [index, key] of scdTableKeys.entries()) {
132
- scdTableData[index] = enrichArray([], { hook, type: "scd", config, scdKey: key });
148
+ scdTableData[index] = u.enrichArray([], { hook, type: "scd", config, scdKey: key });
149
+ }
150
+ const groupProfilesData = u.enrichArray([], { hook, type: "group", config });
151
+ const lookupTableData = u.enrichArray([], { hook, type: "lookup", config });
152
+ const avgEvPerUser = Math.ceil(numEvents / numUsers);
153
+
154
+ // if no funnels, make some out of events...
155
+ if (!funnels || !funnels.length) {
156
+ funnels = inferFunnels(events);
157
+ config.funnels = funnels;
158
+ CONFIG = config;
133
159
  }
134
- // const scdTableData = enrichArray([], { hook, type: "scd", config });
135
- const groupProfilesData = enrichArray([], { hook, type: "groups", config });
136
- const lookupTableData = enrichArray([], { hook, type: "lookups", config });
137
- const avgEvPerUser = Math.floor(numEvents / numUsers);
138
160
 
139
161
  //user loop
140
162
  log(`---------------SIMULATION----------------`, "\n\n");
141
- for (let i = 1; i < numUsers + 1; i++) {
163
+ loopUsers: for (let i = 1; i < numUsers + 1; i++) {
142
164
  u.progress("users", i);
143
- const user = generateUser();
144
- const { distinct_id, $created, anonymousIds, sessionIds } = user;
145
- userProfilesData.hPush(makeProfile(userProps, user));
146
-
147
- //scd loop
165
+ const userId = chance.guid();
166
+ // const user = u.generateUser(userId, numDays, amp, freq, skew);
167
+ const user = u.generateUser(userId, numDays);
168
+ const { distinct_id, created, anonymousIds, sessionIds } = user;
169
+ let numEventsPreformed = 0;
170
+
171
+ // profile creation
172
+ const profile = makeProfile(userProps, user);
173
+ userProfilesData.hookPush(profile);
174
+
175
+ //scd creation
176
+ /** @type {Record<string, SCDTableRow[]>} */
177
+ // @ts-ignore
178
+ const userSCD = {};
148
179
  for (const [index, key] of scdTableKeys.entries()) {
149
180
  const mutations = chance.integer({ min: 1, max: 10 });
150
- scdTableData[index].hPush(makeSCD(scdProps[key], key, distinct_id, mutations, $created));
181
+ const changes = makeSCD(scdProps[key], key, distinct_id, mutations, created);
182
+ // @ts-ignore
183
+ userSCD[key] = changes;
184
+ scdTableData[index].hookPush(changes);
151
185
  }
152
186
 
153
- const numEventsThisUser = Math.round(
154
- chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / u.integer(3, 7) })
155
- );
156
-
157
- if (firstEvents.length) {
158
- eventData.hPush(
159
- makeEvent(
160
- distinct_id,
161
- anonymousIds,
162
- sessionIds,
163
- dayjs($created).unix(),
164
- firstEvents,
165
- superProps,
166
- groupKeys,
167
- true
168
- )
169
- );
187
+ let numEventsThisUserWillPreform = Math.floor(chance.normal({
188
+ mean: avgEvPerUser,
189
+ dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
190
+ }) * 0.714159265359);
191
+
192
+ // power users do 5x more events
193
+ chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
194
+
195
+ // shitty users do 1/3 as many events
196
+ chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
197
+
198
+ numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
199
+
200
+ let userFirstEventTime;
201
+
202
+ //first funnel
203
+ const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
204
+ const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
205
+ const userIsBornInDataset = chance.bool({ likelihood: 30 });
206
+ if (firstFunnels.length && userIsBornInDataset) {
207
+ /** @type {Funnel} */
208
+ const firstFunnel = chance.pickone(firstFunnels, user);
209
+
210
+ const [data, userConverted] = makeFunnel(firstFunnel, user, profile, userSCD, null, config);
211
+ userFirstEventTime = dayjs(data[0].time).unix();
212
+ numEventsPreformed += data.length;
213
+ eventData.hookPush(data);
214
+ if (!userConverted) continue loopUsers;
170
215
  }
171
216
 
172
- //event loop
173
- for (let j = 0; j < numEventsThisUser; j++) {
174
- eventData.hPush(
175
- makeEvent(
176
- distinct_id,
177
- anonymousIds,
178
- sessionIds,
179
- dayjs($created).unix(),
180
- weightedEvents,
181
- superProps,
182
- groupKeys
183
- )
184
- );
217
+ while (numEventsPreformed < numEventsThisUserWillPreform) {
218
+ if (usageFunnels.length) {
219
+ /** @type {Funnel} */
220
+ const currentFunnel = chance.pickone(usageFunnels);
221
+ const [data, userConverted] = makeFunnel(currentFunnel, user, profile, userSCD, userFirstEventTime, config);
222
+ numEventsPreformed += data.length;
223
+ eventData.hookPush(data);
224
+ }
185
225
  }
226
+ // end individual user loop
186
227
  }
187
228
 
188
- //flatten SCD
229
+ //flatten SCD tables
189
230
  scdTableData.forEach((table, index) => scdTableData[index] = table.flat());
190
231
 
191
232
  log("\n");
@@ -199,12 +240,12 @@ async function main(config) {
199
240
  u.progress("groups", i);
200
241
  const group = {
201
242
  [groupKey]: i,
202
- ...makeProfile(groupProps[groupKey]),
203
- // $distinct_id: i,
243
+ ...makeProfile(groupProps[groupKey])
204
244
  };
245
+ group["distinct_id"] = i;
205
246
  groupProfiles.push(group);
206
247
  }
207
- groupProfilesData.hPush({ key: groupKey, data: groupProfiles });
248
+ groupProfilesData.hookPush({ key: groupKey, data: groupProfiles });
208
249
  }
209
250
  log("\n");
210
251
 
@@ -220,10 +261,42 @@ async function main(config) {
220
261
  };
221
262
  data.push(item);
222
263
  }
223
- lookupTableData.hPush({ key, data });
264
+ lookupTableData.hookPush({ key, data });
224
265
  }
225
266
 
226
- // deal with mirror props
267
+ // SHIFT TIME
268
+ const actualNow = dayjs();
269
+ const fixedNow = dayjs.unix(global.NOW);
270
+ const timeShift = actualNow.diff(fixedNow, "second");
271
+
272
+ eventData.forEach((event) => {
273
+ try {
274
+ const newTime = dayjs(event.time).add(timeShift, "second");
275
+ event.time = newTime.toISOString();
276
+ if (epochStart && newTime.unix() < epochStart) event = {};
277
+ if (epochEnd && newTime.unix() > epochEnd) event = {};
278
+ }
279
+ catch (e) {
280
+ //noop
281
+ }
282
+ });
283
+
284
+ // const dayShift = actualNow.diff(global.NOW, "day");
285
+ // userProfilesData.forEach((profile) => {
286
+ // const newTime = dayjs(profile.created).add(dayShift, "day");
287
+ // profile.created = newTime.toISOString();
288
+ // });
289
+
290
+
291
+ // draw charts
292
+ if (makeChart) {
293
+ const bornEvents = config.events?.filter((e) => e.isFirstEvent)?.map(e => e.event) || [];
294
+ const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
295
+ const bornBehaviors = [...bornEvents, ...bornFunnels];
296
+ const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
297
+ }
298
+
299
+ // create mirrorProps
227
300
  let mirrorEventData = [];
228
301
  const mirrorPropKeys = Object.keys(mirrorProps);
229
302
  if (mirrorPropKeys.length) {
@@ -237,7 +310,7 @@ async function main(config) {
237
310
  }
238
311
 
239
312
  const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder } =
240
- buildFileNames(config);
313
+ u.buildFileNames(config);
241
314
  const pairs = [
242
315
  [eventFiles, [eventData]],
243
316
  [userFiles, [userProfilesData]],
@@ -250,20 +323,7 @@ async function main(config) {
250
323
  log(`---------------SIMULATION----------------`, "\n");
251
324
 
252
325
  if (!writeToDisk && !token) {
253
- track('end simulation', {
254
- runId,
255
- seed,
256
- numEvents,
257
- numUsers,
258
- numDays,
259
- anonIds,
260
- sessionIds,
261
- format,
262
- token,
263
- region,
264
- writeToDisk,
265
- isCLI
266
- });
326
+ track('end simulation', trackingParams);
267
327
  return {
268
328
  eventData,
269
329
  userProfilesData,
@@ -271,7 +331,7 @@ async function main(config) {
271
331
  groupProfilesData,
272
332
  lookupTableData,
273
333
  mirrorEventData,
274
- import: {},
334
+ importResults: {},
275
335
  files: []
276
336
  };
277
337
  }
@@ -315,30 +375,28 @@ async function main(config) {
315
375
  const creds = { token };
316
376
  /** @type {import('mixpanel-import').Options} */
317
377
  const commonOpts = {
318
-
319
378
  region,
320
379
  fixData: true,
321
380
  verbose: false,
322
381
  forceStream: true,
323
- strict: false,
382
+ strict: true,
324
383
  dryRun: false,
325
384
  abridged: false,
385
+ fixJson: true,
386
+ showProgress: true
326
387
  };
327
388
 
328
389
  if (eventData) {
329
- log(`importing events to mixpanel...`);
390
+ log(`importing events to mixpanel...\n`);
330
391
  const imported = await mp(creds, eventData, {
331
392
  recordType: "event",
332
- fixData: true,
333
- fixJson: true,
334
- strict: false,
335
393
  ...commonOpts,
336
394
  });
337
395
  log(`\tsent ${comma(imported.success)} events\n`);
338
396
  importResults.events = imported;
339
397
  }
340
398
  if (userProfilesData) {
341
- log(`importing user profiles to mixpanel...`);
399
+ log(`importing user profiles to mixpanel...\n`);
342
400
  const imported = await mp(creds, userProfilesData, {
343
401
  recordType: "user",
344
402
  ...commonOpts,
@@ -350,92 +408,37 @@ async function main(config) {
350
408
  for (const groupProfiles of groupProfilesData) {
351
409
  const groupKey = groupProfiles.key;
352
410
  const data = groupProfiles.data;
353
- log(`importing ${groupKey} profiles to mixpanel...`);
411
+ log(`importing ${groupKey} profiles to mixpanel...\n`);
354
412
  const imported = await mp({ token, groupKey }, data, {
355
413
  recordType: "group",
356
414
  ...commonOpts,
415
+
357
416
  });
358
417
  log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
359
418
 
360
419
  importResults.groups.push(imported);
361
420
  }
362
421
  }
363
-
364
422
  }
365
423
  log(`\n-----------------WRITES------------------`, "\n");
366
- track('end simulation', {
367
- runId,
368
- seed,
369
- numEvents,
370
- numUsers,
371
- numDays,
372
- events,
373
- anonIds,
374
- sessionIds,
375
- format,
376
- targetToken: token,
377
- region,
378
- writeToDisk,
379
- isCLI,
380
- version
381
- });
424
+ track('end simulation', trackingParams);
425
+
382
426
  return {
383
- import: importResults,
427
+ importResults,
384
428
  files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder],
429
+ eventData,
430
+ userProfilesData,
431
+ scdTableData,
432
+ groupProfilesData,
433
+ lookupTableData,
434
+ mirrorEventData,
385
435
  };
386
436
  }
387
437
 
388
438
 
389
439
 
390
440
 
391
- function makeProfile(props, defaults) {
392
- //build the spec
393
- const profile = {
394
- ...defaults,
395
- };
396
-
397
- // anonymous and session ids
398
- if (!global.MP_SIMULATION_CONFIG?.anonIds) delete profile.anonymousIds;
399
- if (!global.MP_SIMULATION_CONFIG?.sessionIds) delete profile.sessionIds;
400
-
401
- for (const key in props) {
402
- try {
403
- profile[key] = u.choose(props[key]);
404
- } catch (e) {
405
- // debugger;
406
- }
407
- }
408
-
409
- return profile;
410
- }
411
- /**
412
- * @param {import('./types.d.ts').ValueValid} prop
413
- * @param {string} scdKey
414
- * @param {string} distinct_id
415
- * @param {number} mutations
416
- * @param {string} $created
417
- */
418
- function makeSCD(prop, scdKey, distinct_id, mutations, $created) {
419
- if (JSON.stringify(prop) === "{}") return {};
420
- if (JSON.stringify(prop) === "[]") return [];
421
- const scdEntries = [];
422
- let lastInserted = dayjs($created);
423
- const deltaDays = dayjs().diff(lastInserted, "day");
424
-
425
- for (let i = 0; i < mutations; i++) {
426
- if (lastInserted.isAfter(dayjs())) break;
427
- const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
428
- scd.startTime = lastInserted.toISOString();
429
- lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
430
- scd.insertTime = lastInserted.toISOString();
431
- scdEntries.push({ ...scd });
432
- lastInserted = lastInserted
433
- .add(u.integer(0, deltaDays), "day")
434
- .subtract(u.integer(1, 1000), "seconds");
435
- }
436
441
 
437
- return scdEntries;
438
- }
439
442
 
440
443
  /**
441
444
  * creates a random event
@@ -443,45 +446,43 @@ function makeSCD(prop, scdKey, distinct_id, mutations, $created) {
443
446
  * @param {string[]} anonymousIds
444
447
  * @param {string[]} sessionIds
445
448
  * @param {number} earliestTime
446
- * @param {Object[]} events
449
+ * @param {EventConfig} chosenEvent
447
450
  * @param {Object} superProps
448
451
  * @param {Object} groupKeys
449
452
  * @param {Boolean} isFirstEvent=false
450
453
  */
451
- function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events, superProps, groupKeys, isFirstEvent = false) {
452
- let chosenEvent = chance.pickone(events);
453
-
454
- //allow for a string shorthand
455
- if (typeof chosenEvent === "string") {
456
- chosenEvent = { event: chosenEvent, properties: {} };
457
- }
458
-
454
+ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, chosenEvent, superProps, groupKeys, isFirstEvent = false) {
455
+ const { mean = 0, dev = 2, peaks = 5 } = CONFIG.soup;
459
456
  //event model
460
- const event = {
457
+ const eventTemplate = {
461
458
  event: chosenEvent.event,
462
- $source: "AKsTimeSoup",
459
+ source: "dm4",
463
460
  };
464
461
 
465
462
  //event time
466
- if (isFirstEvent) event.time = dayjs.unix(earliestTime).toISOString();
467
- if (!isFirstEvent) event.time = AKsTimeSoup(earliestTime, NOW);
463
+ if (earliestTime > NOW) {
464
+ earliestTime = dayjs.unix(NOW).subtract(2, 'd').unix();
465
+ };
466
+
467
+ if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
468
+ if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, NOW, peaks, dev, mean);
468
469
 
469
470
  // anonymous and session ids
470
- if (global.MP_SIMULATION_CONFIG?.anonIds) event.$device_id = chance.pickone(anonymousIds);
471
- if (global.MP_SIMULATION_CONFIG?.sessionIds) event.$session_id = chance.pickone(sessionIds);
471
+ if (CONFIG?.anonIds) eventTemplate.device_id = CONFIG.chance.pickone(anonymousIds);
472
+ if (CONFIG?.sessionIds) eventTemplate.session_id = CONFIG.chance.pickone(sessionIds);
472
473
 
473
- //sometimes have a $user_id
474
- if (!isFirstEvent && chance.bool({ likelihood: 42 })) event.$user_id = distinct_id;
474
+ //sometimes have a user_id
475
+ if (!isFirstEvent && CONFIG.chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
475
476
 
476
- // ensure that there is a $user_id or $device_id
477
- if (!event.$user_id && !event.$device_id) event.$user_id = distinct_id;
477
+ // ensure that there is a user_id or device_id
478
+ if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
478
479
 
479
480
  const props = { ...chosenEvent.properties, ...superProps };
480
481
 
481
482
  //iterate through custom properties
482
483
  for (const key in props) {
483
484
  try {
484
- event[key] = u.choose(props[key]);
485
+ eventTemplate[key] = u.choose(props[key]);
485
486
  } catch (e) {
486
487
  console.error(`error with ${key} in ${chosenEvent.event} event`, e);
487
488
  debugger;
@@ -495,99 +496,286 @@ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events,
495
496
  const groupEvents = groupPair[2] || [];
496
497
 
497
498
  // empty array for group events means all events
498
- if (!groupEvents.length) event[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
499
- if (groupEvents.includes(event.event)) event[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
499
+ if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
500
+ if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
500
501
  }
501
502
 
502
503
  //make $insert_id
503
- event.$insert_id = md5(JSON.stringify(event));
504
+ eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
504
505
 
505
- return event;
506
+ return eventTemplate;
506
507
  }
507
508
 
508
- function buildFileNames(config) {
509
- const { format = "csv", groupKeys = [], lookupTables = [], m } = config;
510
- let extension = "";
511
- extension = format === "csv" ? "csv" : "json";
512
- // const current = dayjs.utc().format("MM-DD-HH");
513
- const simName = config.simulationName;
514
- let writeDir = "./";
515
- if (config.writeToDisk) writeDir = mkdir("./data");
516
- if (typeof writeDir !== "string") throw new Error("writeDir must be a string");
517
- if (typeof simName !== "string") throw new Error("simName must be a string");
518
-
519
- const writePaths = {
520
- eventFiles: [path.join(writeDir, `${simName}-EVENTS.${extension}`)],
521
- userFiles: [path.join(writeDir, `${simName}-USERS.${extension}`)],
522
- scdFiles: [],
523
- mirrorFiles: [],
524
- groupFiles: [],
525
- lookupFiles: [],
526
- folder: writeDir,
527
- };
509
+ /**
510
+ * from a funnel spec to a funnel that a user completes/doesn't complete
511
+ * this is called MANY times per user
512
+ * @param {Funnel} funnel
513
+ * @param {Person} user
514
+ * @param {UserProfile} profile
515
+ * @param {Record<string, SCDTableRow[]>} scd
516
+ * @param {number} firstEventTime
517
+ * @param {Config} config
518
+ * @return {[EventSpec[], Boolean]}
519
+ */
520
+ function makeFunnel(funnel, user, profile, scd, firstEventTime, config) {
521
+ const { hook } = config;
522
+ hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
523
+ let {
524
+ sequence,
525
+ conversionRate = 50,
526
+ order = 'sequential',
527
+ timeToConvert = 1,
528
+ props,
529
+ requireRepeats = false,
530
+ } = funnel;
531
+ const { distinct_id, created, anonymousIds, sessionIds } = user;
532
+ const { superProps, groupKeys } = config;
533
+ const { name, email } = profile;
534
+
535
+ const chosenFunnelProps = { ...props, ...superProps };
536
+ for (const key in props) {
537
+ try {
538
+ chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
539
+ } catch (e) {
540
+ console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
541
+ debugger;
542
+ }
543
+ }
528
544
 
529
- //add SCD files
530
- const scdKeys = Object.keys(config?.scdProps || {});
531
- for (const key of scdKeys) {
532
- writePaths.scdFiles.push(
533
- path.join(writeDir, `${simName}-${key}-SCD.${extension}`)
534
- );
545
+ const funnelPossibleEvents = sequence
546
+ .map((eventName) => {
547
+ const foundEvent = config.events.find((e) => e.event === eventName);
548
+ /** @type {EventConfig} */
549
+ const eventSpec = foundEvent || { event: eventName, properties: {} };
550
+ for (const key in eventSpec.properties) {
551
+ try {
552
+ eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
553
+ } catch (e) {
554
+ console.error(`error with ${key} in ${eventSpec.event} event`, e);
555
+ debugger;
556
+ }
557
+ }
558
+ delete eventSpec.isFirstEvent;
559
+ delete eventSpec.weight;
560
+ eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
561
+ return eventSpec;
562
+ })
563
+ .reduce((acc, step) => {
564
+ if (!requireRepeats) {
565
+ if (acc.find(e => e.event === step.event)) {
566
+ if (config.chance.bool({ likelihood: 50 })) {
567
+ conversionRate = Math.floor(conversionRate * 1.25); //increase conversion rate
568
+ acc.push(step);
569
+ }
570
+ //A SKIPPED STEP!
571
+ else {
572
+ conversionRate = Math.floor(conversionRate * .75); //reduce conversion rate
573
+ return acc; //early return to skip the step
574
+ }
575
+ }
576
+ else {
577
+ acc.push(step);
578
+ }
579
+ }
580
+ else {
581
+ acc.push(step);
582
+ }
583
+ return acc;
584
+ }, []);
585
+
586
+ let doesUserConvert = config.chance.bool({ likelihood: conversionRate });
587
+ let numStepsUserWillTake = sequence.length;
588
+ if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
589
+ const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
590
+ const msInHour = 60000 * 60;
591
+ const funnelStepsUserWillTake = funnelPossibleEvents.slice(0, numStepsUserWillTake);
592
+
593
+ let funnelActualOrder = [];
594
+
595
+ switch (order) {
596
+ case "sequential":
597
+ funnelActualOrder = funnelStepsUserWillTake;
598
+ break;
599
+ case "random":
600
+ funnelActualOrder = u.shuffleArray(funnelStepsUserWillTake);
601
+ break;
602
+ case "first-fixed":
603
+ funnelActualOrder = u.shuffleExceptFirst(funnelStepsUserWillTake);
604
+ break;
605
+ case "last-fixed":
606
+ funnelActualOrder = u.shuffleExceptLast(funnelStepsUserWillTake);
607
+ break;
608
+ case "first-and-last-fixed":
609
+ funnelActualOrder = u.fixFirstAndLast(funnelStepsUserWillTake);
610
+ break;
611
+ case "middle-fixed":
612
+ funnelActualOrder = u.shuffleOutside(funnelStepsUserWillTake);
613
+ break;
614
+ case "interrupted":
615
+ const potentialSubstitutes = config?.events
616
+ ?.filter(e => !e.isFirstEvent)
617
+ ?.filter(e => !sequence.includes(e.event)) || [];
618
+ funnelActualOrder = u.interruptArray(funnelStepsUserWillTake, potentialSubstitutes);
619
+ break;
620
+ default:
621
+ funnelActualOrder = funnelStepsUserWillTake;
622
+ break;
535
623
  }
536
624
 
537
- //add group files
538
- for (const groupPair of groupKeys) {
539
- const groupKey = groupPair[0];
540
625
 
541
- writePaths.groupFiles.push(
542
- path.join(writeDir, `${simName}-${groupKey}-GROUP.${extension}`)
543
- );
544
- }
545
626
 
546
- //add lookup files
547
- for (const lookupTable of lookupTables) {
548
- const { key } = lookupTable;
549
- writePaths.lookupFiles.push(
550
- //lookups are always CSVs
551
- path.join(writeDir, `${simName}-${key}-LOOKUP.csv`)
552
- );
627
+ let lastTimeJump = 0;
628
+ const funnelActualEventsWithOffset = funnelActualOrder
629
+ .map((event, index) => {
630
+ if (index === 0) {
631
+ event.relativeTimeMs = 0;
632
+ return event;
633
+ }
634
+
635
+ // Calculate base increment for each step
636
+ const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
637
+
638
+ // Introduce a random fluctuation factor
639
+ const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
640
+
641
+ // Ensure the time increments are increasing and add randomness
642
+ const previousTime = lastTimeJump;
643
+ const currentTime = previousTime + baseIncrement + fluctuation;
644
+
645
+ // Assign the calculated time to the event
646
+ const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
647
+ lastTimeJump = chosenTime;
648
+ event.relativeTimeMs = chosenTime;
649
+ return event;
650
+ });
651
+
652
+
653
+ const earliestTime = firstEventTime || dayjs(created).unix();
654
+ let funnelStartTime;
655
+ let finalEvents = funnelActualEventsWithOffset
656
+ .map((event, index) => {
657
+ const newEvent = makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, event, {}, groupKeys);
658
+ if (index === 0) {
659
+ funnelStartTime = dayjs(newEvent.time);
660
+ delete newEvent.relativeTimeMs;
661
+ return newEvent;
662
+ }
663
+ try {
664
+ newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
665
+ delete newEvent.relativeTimeMs;
666
+ return newEvent;
667
+ }
668
+ catch (e) {
669
+
670
+ debugger;
671
+ }
672
+ });
673
+
674
+
675
+ hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
676
+ return [finalEvents, doesUserConvert];
677
+ }
678
+
679
+
680
+ function inferFunnels(events) {
681
+ const createdFunnels = [];
682
+ const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
683
+ const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
684
+ const numFunnelsToCreate = Math.ceil(usageEvents.length);
685
+ /** @type {Funnel} */
686
+ const funnelTemplate = {
687
+ sequence: [],
688
+ conversionRate: 50,
689
+ order: 'sequential',
690
+ requireRepeats: false,
691
+ props: {},
692
+ timeToConvert: 1,
693
+ isFirstFunnel: false,
694
+ weight: 1
695
+ };
696
+ if (firstEvents.length) {
697
+ for (const event of firstEvents) {
698
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
699
+ }
553
700
  }
554
701
 
555
- //add mirror files
556
- const mirrorProps = config?.mirrorProps || {};
557
- if (Object.keys(mirrorProps).length) {
558
- writePaths.mirrorFiles.push(
559
- path.join(writeDir, `${simName}-MIRROR.${extension}`)
560
- );
702
+ //at least one funnel with all usage events
703
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
704
+
705
+ //for the rest, make random funnels
706
+ followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
707
+ /** @type {Funnel} */
708
+ const funnel = { ...clone(funnelTemplate) };
709
+ funnel.conversionRate = u.integer(25, 75);
710
+ funnel.timeToConvert = u.integer(1, 10);
711
+ funnel.weight = u.integer(1, 10);
712
+ const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
713
+ funnel.sequence = sequence;
714
+ funnel.order = 'random';
715
+ createdFunnels.push(funnel);
561
716
  }
562
717
 
563
- return writePaths;
718
+ return createdFunnels;
719
+
564
720
  }
565
721
 
566
- /** @typedef {import('./types').EnrichedArray} EnrichArray */
567
- /** @typedef {import('./types').EnrichArrayOptions} EnrichArrayOptions */
568
722
 
569
- /**
570
- * @param {any[]} arr
571
- * @param {EnrichArrayOptions} opts
572
- * @returns {EnrichArray}}
723
+ function makeProfile(props, defaults) {
724
+ //build the spec
725
+ const profile = {
726
+ ...defaults,
727
+ };
728
+
729
+ // anonymous and session ids
730
+ if (!CONFIG?.anonIds) delete profile.anonymousIds;
731
+ if (!CONFIG?.sessionIds) delete profile.sessionIds;
732
+
733
+ for (const key in props) {
734
+ try {
735
+ profile[key] = u.choose(props[key]);
736
+ } catch (e) {
737
+ // debugger;
738
+ }
739
+ }
740
+
741
+ return profile;
742
+ }
743
+
744
+ /**
745
+ * @param {import('./types.d.ts').ValueValid} prop
746
+ * @param {string} scdKey
747
+ * @param {string} distinct_id
748
+ * @param {number} mutations
749
+ * @param {string} created
573
750
  */
574
- function enrichArray(arr = [], opts = {}) {
575
- const { hook = a => a, type = "", ...rest } = opts;
751
+ function makeSCD(prop, scdKey, distinct_id, mutations, created) {
752
+ if (JSON.stringify(prop) === "{}") return {};
753
+ if (JSON.stringify(prop) === "[]") return [];
754
+ const scdEntries = [];
755
+ let lastInserted = dayjs(created);
756
+ const deltaDays = dayjs().diff(lastInserted, "day");
576
757
 
577
- function transformThenPush(item) {
578
- return arr.push(hook(item, type, rest));
758
+ for (let i = 0; i < mutations; i++) {
759
+ if (lastInserted.isAfter(dayjs())) break;
760
+ const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
761
+ scd.startTime = lastInserted.toISOString();
762
+ lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
763
+ scd.insertTime = lastInserted.toISOString();
764
+ scdEntries.push({ ...scd });
765
+ lastInserted = lastInserted
766
+ .add(u.integer(0, deltaDays), "day")
767
+ .subtract(u.integer(1, 1000), "seconds");
579
768
  }
580
769
 
581
- /** @type {EnrichArray} */
582
- // @ts-ignore
583
- const enrichedArray = arr;
770
+ return scdEntries;
771
+ }
772
+
773
+
774
+
775
+
584
776
 
585
777
 
586
- enrichedArray.hPush = transformThenPush;
587
-
588
778
 
589
- return enrichedArray;
590
- };
591
779
 
592
780
 
593
781
 
@@ -596,12 +784,11 @@ if (require.main === module) {
596
784
  isCLI = true;
597
785
  const args = cliParams();
598
786
  // @ts-ignore
599
- const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, sessionIds, anonIds } = args;
787
+ let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, sessionIds, anonIds } = args;
600
788
  // @ts-ignore
601
789
  const suppliedConfig = args._[0];
602
790
 
603
- //if the user specifics an separate config file
604
- //todo this text isn't displaying
791
+ //if the user specifies an separate config file
605
792
  let config = null;
606
793
  if (suppliedConfig) {
607
794
  console.log(`using ${suppliedConfig} for data\n`);
@@ -612,18 +799,19 @@ if (require.main === module) {
612
799
  console.log(`... using default COMPLEX configuration [everything] ...\n`);
613
800
  console.log(`... for more simple data, don't use the --complex flag ...\n`);
614
801
  console.log(`... or specify your own js config file (see docs or --help) ...\n`);
615
- config = require(path.resolve(__dirname, "./models/complex.js"));
802
+ config = require(path.resolve(__dirname, "./schemas/complex.js"));
616
803
  }
617
804
  else {
618
805
  console.log(`... using default SIMPLE configuration [events + users] ...\n`);
619
806
  console.log(`... for more complex data, use the --complex flag ...\n`);
620
- config = require(path.resolve(__dirname, "./models/simple.js"));
807
+ config = require(path.resolve(__dirname, "./schemas/simple.js"));
621
808
  }
622
809
  }
623
810
 
624
811
  //override config with cli params
625
812
  if (token) config.token = token;
626
813
  if (seed) config.seed = seed;
814
+ if (format === "csv" && config.format === "json") format = "json";
627
815
  if (format) config.format = format;
628
816
  if (numDays) config.numDays = numDays;
629
817
  if (numUsers) config.numUsers = numUsers;
@@ -640,7 +828,7 @@ if (require.main === module) {
640
828
  log(`-----------------SUMMARY-----------------`);
641
829
  const d = { success: 0, bytes: 0 };
642
830
  const darr = [d];
643
- const { events = d, groups = darr, users = d } = data.import;
831
+ const { events = d, groups = darr, users = d } = data.importResults;
644
832
  const files = data.files;
645
833
  const folder = files?.pop();
646
834
  const groupBytes = groups.reduce((acc, group) => {
@@ -673,7 +861,6 @@ if (require.main === module) {
673
861
  });
674
862
  } else {
675
863
  main.utils = { ...u };
676
- main.timeSoup = AKsTimeSoup;
677
864
  module.exports = main;
678
865
  }
679
866