make-mp-data 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6,27 +6,29 @@ by AK
6
6
  ak@mixpanel.com
7
7
  */
8
8
 
9
- const RUNTIME = process.env.RUNTIME || "unspecified";
10
- const mp = require("mixpanel-import");
11
- const path = require("path");
12
- const Chance = require("chance");
13
- const chance = new Chance();
14
- const { touch, comma, bytesHuman, mkdir } = require("ak-tools");
15
- const Papa = require("papaparse");
16
- const u = require("./utils.js");
17
- const AKsTimeSoup = require("./timesoup.js");
18
9
  const dayjs = require("dayjs");
19
10
  const utc = require("dayjs/plugin/utc");
20
11
  dayjs.extend(utc);
12
+ const NOW = dayjs('2024-02-02').unix(); //this is a FIXED POINT and we will shift it later
13
+ global.NOW = NOW;
14
+ const mp = require("mixpanel-import");
15
+ const path = require("path");
16
+ const { comma, bytesHuman, makeName, md5, clone, tracker, uid } = require("ak-tools");
17
+ const { generateLineChart } = require('./chart.js');
18
+ const { version } = require('./package.json');
19
+ const os = require("os");
20
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
21
+
22
+
23
+ const u = require("./utils.js");
21
24
  const cliParams = require("./cli.js");
22
- const { makeName, md5, clone, tracker, uid } = require('ak-tools');
23
- const NOW = dayjs().unix();
25
+
24
26
  let VERBOSE = false;
25
27
  let isCLI = false;
28
+ let CONFIG;
29
+ require('dotenv').config();
30
+
26
31
 
27
- const { version } = require('./package.json');
28
- const os = require("os");
29
- const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
30
32
  function track(name, props, ...rest) {
31
33
  if (process.env.NODE_ENV === 'test') return;
32
34
  metrics(name, props, ...rest);
@@ -34,19 +36,33 @@ function track(name, props, ...rest) {
34
36
 
35
37
  /** @typedef {import('./types.d.ts').Config} Config */
36
38
  /** @typedef {import('./types.d.ts').EventConfig} EventConfig */
39
+ /** @typedef {import('./types.d.ts').Funnel} Funnel */
40
+ /** @typedef {import('./types.d.ts').Person} Person */
41
+ /** @typedef {import('./types.d.ts').SCDTableRow} SCDTableRow */
42
+ /** @typedef {import('./types.d.ts').UserProfile} UserProfile */
43
+ /** @typedef {import('./types.d.ts').EventSpec} EventSpec */
37
44
 
38
45
  /**
39
46
  * generates fake mixpanel data
40
47
  * @param {Config} config
41
48
  */
42
49
  async function main(config) {
50
+ //PARAMS
51
+ const seedWord = process.env.SEED || config.seed || "hello friend!";
52
+ config.seed = seedWord;
53
+ u.initChance(seedWord);
54
+ const chance = u.getChance();
55
+ config.chance = chance;
43
56
  let {
44
- seed = "every time a rug is micturated upon in this fair city...",
57
+ seed,
45
58
  numEvents = 100000,
46
59
  numUsers = 1000,
47
60
  numDays = 30,
61
+ epochStart = 0,
62
+ epochEnd = dayjs().unix(),
48
63
  events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
49
64
  superProps = { platform: ["web", "iOS", "Android"] },
65
+ funnels = [],
50
66
  userProps = {
51
67
  favoriteColor: ["red", "green", "blue", "yellow"],
52
68
  spiritAnimal: chance.animal.bind(chance),
@@ -63,129 +79,185 @@ async function main(config) {
63
79
  region = "US",
64
80
  writeToDisk = false,
65
81
  verbose = false,
82
+ makeChart = false,
83
+ soup = {},
66
84
  hook = (record) => record,
67
85
  } = config;
86
+ if (!config.superProps) config.superProps = superProps;
87
+ if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
68
88
  VERBOSE = verbose;
69
89
  config.simulationName = makeName();
70
90
  const { simulationName } = config;
91
+ if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
92
+ if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
93
+ if (epochStart && numDays) { } //noop
94
+ if (!epochStart && !numDays) debugger; //never happens
95
+ config.seed = seed;
96
+ config.numEvents = numEvents;
97
+ config.numUsers = numUsers;
98
+ config.numDays = numDays;
99
+ config.epochStart = epochStart;
100
+ config.epochEnd = epochEnd;
101
+ config.events = events;
102
+ config.superProps = superProps;
103
+ config.funnels = funnels;
104
+ config.userProps = userProps;
105
+ config.scdProps = scdProps;
106
+ config.mirrorProps = mirrorProps;
107
+ config.groupKeys = groupKeys;
108
+ config.groupProps = groupProps;
109
+ config.lookupTables = lookupTables;
110
+ config.anonIds = anonIds;
111
+ config.sessionIds = sessionIds;
112
+ config.format = format;
113
+ config.token = token;
114
+ config.region = region;
115
+ config.writeToDisk = writeToDisk;
116
+ config.verbose = verbose;
117
+ config.makeChart = makeChart;
118
+ config.soup = soup;
119
+ config.hook = hook;
120
+
121
+ //event validation
122
+ const validatedEvents = validateEvents(events);
123
+ events = validatedEvents;
124
+ config.events = validatedEvents;
71
125
  global.MP_SIMULATION_CONFIG = config;
72
- const uuidChance = new Chance(seed);
126
+ CONFIG = config;
73
127
  const runId = uid(42);
74
- track('start simulation', {
75
- runId,
76
- seed,
77
- numEvents,
78
- numUsers,
79
- numDays,
80
- anonIds,
81
- sessionIds,
82
- format,
83
- targetToken: token,
84
- region,
85
- writeToDisk,
86
- isCLI,
87
- version
88
- });
128
+ let trackingParams = { runId, seed, numEvents, numUsers, numDays, anonIds, sessionIds, format, targetToken: token, region, writeToDisk, isCLI, version };
129
+ track('start simulation', trackingParams);
130
+
89
131
  log(`------------------SETUP------------------`);
90
132
  log(`\nyour data simulation will heretofore be known as: \n\n\t${simulationName.toUpperCase()}...\n`);
91
- log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk }, null, 2));
133
+ log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk, anonIds, sessionIds }, null, 2));
92
134
  log(`------------------SETUP------------------`, "\n");
93
135
 
94
-
95
- //the function which generates $distinct_id + $anonymous_ids, $session_ids, and $created, skewing towards the present
96
- function generateUser() {
97
- const distinct_id = uuidChance.guid();
98
- let z = u.boxMullerRandom();
99
- const skew = chance.normal({ mean: 10, dev: 3 });
100
- z = u.applySkew(z, skew);
101
-
102
- // Scale and shift the normally distributed value to fit the range of days
103
- const maxZ = u.integer(2, 4);
104
- const scaledZ = (z / maxZ + 1) / 2;
105
- const daysAgoBorn = Math.round(scaledZ * (numDays - 1)) + 1;
106
-
107
- return {
108
- distinct_id,
109
- ...u.person(daysAgoBorn),
110
- };
136
+ //setup all the data structures we will push into
137
+ const eventData = u.enrichArray([], { hook, type: "event", config });
138
+ const userProfilesData = u.enrichArray([], { hook, type: "user", config });
139
+ const scdTableKeys = Object.keys(scdProps);
140
+ const scdTableData = [];
141
+ for (const [index, key] of scdTableKeys.entries()) {
142
+ scdTableData[index] = u.enrichArray([], { hook, type: "scd", config, scdKey: key });
111
143
  }
112
-
113
- // weigh events for random selection
114
- const weightedEvents = events
115
- .reduce((acc, event) => {
116
- const weight = event.weight || 1;
117
- for (let i = 0; i < weight; i++) {
118
-
119
- acc.push(event);
144
+ const groupProfilesData = u.enrichArray([], { hook, type: "group", config });
145
+ const lookupTableData = u.enrichArray([], { hook, type: "lookup", config });
146
+ const avgEvPerUser = Math.ceil(numEvents / numUsers);
147
+
148
+ // if no funnels, make some out of events...
149
+ if (!funnels || !funnels.length) {
150
+ const createdFunnels = [];
151
+ const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
152
+ const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
153
+ const numFunnelsToCreate = Math.ceil(usageEvents.length);
154
+ /** @type {Funnel} */
155
+ const funnelTemplate = {
156
+ sequence: [],
157
+ conversionRate: 50,
158
+ order: 'sequential',
159
+ props: {},
160
+ timeToConvert: 1,
161
+ isFirstFunnel: false,
162
+ weight: 1
163
+ };
164
+ if (firstEvents.length) {
165
+ for (const event of firstEvents) {
166
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
120
167
  }
121
- return acc;
122
- }, [])
168
+ }
123
169
 
124
- .filter((e) => !e.isFirstEvent);
170
+ //at least one funnel with all usage events
171
+ createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
172
+
173
+ //for the rest, make random funnels
174
+ followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
175
+ /** @type {Funnel} */
176
+ const funnel = { ...clone(funnelTemplate) };
177
+ funnel.conversionRate = u.integer(25, 75);
178
+ funnel.timeToConvert = u.integer(1, 10);
179
+ funnel.weight = u.integer(1, 10);
180
+ const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
181
+ funnel.sequence = sequence;
182
+ funnel.order = 'random';
183
+ createdFunnels.push(funnel);
184
+ }
185
+
186
+ funnels = createdFunnels;
187
+ config.funnels = funnels;
188
+ CONFIG = config;
125
189
 
126
- const firstEvents = events.filter((e) => e.isFirstEvent);
127
- const eventData = enrichArray([], { hook, type: "event", config });
128
- const userProfilesData = enrichArray([], { hook, type: "user", config });
129
- const scdTableKeys = Object.keys(scdProps);
130
- const scdTableData = [];
131
- for (const [index, key] of scdTableKeys.entries()) {
132
- scdTableData[index] = enrichArray([], { hook, type: "scd", config, scdKey: key });
133
190
  }
134
- // const scdTableData = enrichArray([], { hook, type: "scd", config });
135
- const groupProfilesData = enrichArray([], { hook, type: "groups", config });
136
- const lookupTableData = enrichArray([], { hook, type: "lookups", config });
137
- const avgEvPerUser = Math.floor(numEvents / numUsers);
138
191
 
139
192
  //user loop
140
193
  log(`---------------SIMULATION----------------`, "\n\n");
141
- for (let i = 1; i < numUsers + 1; i++) {
194
+ loopUsers: for (let i = 1; i < numUsers + 1; i++) {
142
195
  u.progress("users", i);
143
- const user = generateUser();
144
- const { distinct_id, $created, anonymousIds, sessionIds } = user;
145
- userProfilesData.hPush(makeProfile(userProps, user));
146
-
147
- //scd loop
196
+ const userId = chance.guid();
197
+ // const user = u.generateUser(userId, numDays, amp, freq, skew);
198
+ const user = u.generateUser(userId, numDays);
199
+ const { distinct_id, created, anonymousIds, sessionIds } = user;
200
+ let numEventsPreformed = 0;
201
+
202
+ // profile creation
203
+ const profile = makeProfile(userProps, user);
204
+ userProfilesData.hookPush(profile);
205
+
206
+ //scd creation
207
+ /** @type {Record<string, SCDTableRow[]>} */
208
+ // @ts-ignore
209
+ const userSCD = {};
148
210
  for (const [index, key] of scdTableKeys.entries()) {
149
211
  const mutations = chance.integer({ min: 1, max: 10 });
150
- scdTableData[index].hPush(makeSCD(scdProps[key], key, distinct_id, mutations, $created));
212
+ const changes = makeSCD(scdProps[key], key, distinct_id, mutations, created);
213
+ // @ts-ignore
214
+ userSCD[key] = changes;
215
+ scdTableData[index].hookPush(changes);
151
216
  }
152
217
 
153
- const numEventsThisUser = Math.round(
154
- chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / u.integer(3, 7) })
155
- );
218
+ let numEventsThisUserWillPreform = Math.floor(chance.normal({
219
+ mean: avgEvPerUser,
220
+ dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
221
+ }) * 0.714159265359);
156
222
 
157
- if (firstEvents.length) {
158
- eventData.hPush(
159
- makeEvent(
160
- distinct_id,
161
- anonymousIds,
162
- sessionIds,
163
- dayjs($created).unix(),
164
- firstEvents,
165
- superProps,
166
- groupKeys,
167
- true
168
- )
169
- );
223
+ // power users do 5x more events
224
+ chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
225
+
226
+ // shitty users do 1/3 as many events
227
+ chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
228
+
229
+ numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
230
+
231
+ let userFirstEventTime;
232
+
233
+ //first funnel
234
+ const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
235
+ const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
236
+ const userIsBornInDataset = chance.bool({ likelihood: 30 });
237
+ if (firstFunnels.length && userIsBornInDataset) {
238
+ /** @type {Funnel} */
239
+ const firstFunnel = chance.pickone(firstFunnels, user);
240
+
241
+ const [data, userConverted] = makeFunnel(firstFunnel, user, profile, userSCD, null, config);
242
+ userFirstEventTime = dayjs(data[0].time).unix();
243
+ numEventsPreformed += data.length;
244
+ eventData.hookPush(data);
245
+ if (!userConverted) continue loopUsers;
170
246
  }
171
247
 
172
- //event loop
173
- for (let j = 0; j < numEventsThisUser; j++) {
174
- eventData.hPush(
175
- makeEvent(
176
- distinct_id,
177
- anonymousIds,
178
- sessionIds,
179
- dayjs($created).unix(),
180
- weightedEvents,
181
- superProps,
182
- groupKeys
183
- )
184
- );
248
+ while (numEventsPreformed < numEventsThisUserWillPreform) {
249
+ if (usageFunnels.length) {
250
+ /** @type {Funnel} */
251
+ const currentFunnel = chance.pickone(usageFunnels);
252
+ const [data, userConverted] = makeFunnel(currentFunnel, user, profile, userSCD, userFirstEventTime, config);
253
+ numEventsPreformed += data.length;
254
+ eventData.hookPush(data);
255
+ }
185
256
  }
257
+ // end individual user loop
186
258
  }
187
259
 
188
- //flatten SCD
260
+ //flatten SCD tables
189
261
  scdTableData.forEach((table, index) => scdTableData[index] = table.flat());
190
262
 
191
263
  log("\n");
@@ -202,9 +274,10 @@ async function main(config) {
202
274
  ...makeProfile(groupProps[groupKey]),
203
275
  // $distinct_id: i,
204
276
  };
277
+ group["distinct_id"] = i;
205
278
  groupProfiles.push(group);
206
279
  }
207
- groupProfilesData.hPush({ key: groupKey, data: groupProfiles });
280
+ groupProfilesData.hookPush({ key: groupKey, data: groupProfiles });
208
281
  }
209
282
  log("\n");
210
283
 
@@ -220,10 +293,36 @@ async function main(config) {
220
293
  };
221
294
  data.push(item);
222
295
  }
223
- lookupTableData.hPush({ key, data });
296
+ lookupTableData.hookPush({ key, data });
224
297
  }
225
298
 
226
- // deal with mirror props
299
+ // SHIFT TIME
300
+ const actualNow = dayjs();
301
+ const fixedNow = dayjs.unix(global.NOW);
302
+ const timeShift = actualNow.diff(fixedNow, "second");
303
+ const dayShift = actualNow.diff(global.NOW, "day");
304
+ eventData.forEach((event) => {
305
+ const newTime = dayjs(event.time).add(timeShift, "second");
306
+ event.time = newTime.toISOString();
307
+ if (epochStart && newTime.unix() < epochStart) event = {};
308
+ if (epochEnd && newTime.unix() > epochEnd) event = {};
309
+ });
310
+
311
+ userProfilesData.forEach((profile) => {
312
+ const newTime = dayjs(profile.created).add(dayShift, "day");
313
+ profile.created = newTime.toISOString();
314
+ });
315
+
316
+
317
+ // draw charts
318
+ if (makeChart) {
319
+ const bornEvents = config.events?.filter((e) => e.isFirstEvent)?.map(e => e.event) || [];
320
+ const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
321
+ const bornBehaviors = [...bornEvents, ...bornFunnels];
322
+ const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
323
+ }
324
+
325
+ // create mirrorProps
227
326
  let mirrorEventData = [];
228
327
  const mirrorPropKeys = Object.keys(mirrorProps);
229
328
  if (mirrorPropKeys.length) {
@@ -237,7 +336,7 @@ async function main(config) {
237
336
  }
238
337
 
239
338
  const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder } =
240
- buildFileNames(config);
339
+ u.buildFileNames(config);
241
340
  const pairs = [
242
341
  [eventFiles, [eventData]],
243
342
  [userFiles, [userProfilesData]],
@@ -250,20 +349,7 @@ async function main(config) {
250
349
  log(`---------------SIMULATION----------------`, "\n");
251
350
 
252
351
  if (!writeToDisk && !token) {
253
- track('end simulation', {
254
- runId,
255
- seed,
256
- numEvents,
257
- numUsers,
258
- numDays,
259
- anonIds,
260
- sessionIds,
261
- format,
262
- token,
263
- region,
264
- writeToDisk,
265
- isCLI
266
- });
352
+ track('end simulation', trackingParams);
267
353
  return {
268
354
  eventData,
269
355
  userProfilesData,
@@ -271,12 +357,13 @@ async function main(config) {
271
357
  groupProfilesData,
272
358
  lookupTableData,
273
359
  mirrorEventData,
274
- import: {},
360
+ importResults: {},
275
361
  files: []
276
362
  };
277
363
  }
278
364
  log(`-----------------WRITES------------------`, `\n\n`);
279
- //write the files
365
+
366
+ let writeFilePromises = [];
280
367
  if (writeToDisk) {
281
368
  if (verbose) log(`writing files... for ${simulationName}`);
282
369
  loopFiles: for (const ENTITY of pairs) {
@@ -295,25 +382,16 @@ async function main(config) {
295
382
  log(`\twriting ${path}`);
296
383
  //if it's a lookup table, it's always a CSV
297
384
  if (format === "csv" || path.includes("-LOOKUP.csv")) {
298
- const columns = u.getUniqueKeys(TABLE);
299
- //papa parse needs eac nested field JSON stringified
300
- TABLE.forEach((e) => {
301
- for (const key in e) {
302
- if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
303
- }
304
- });
305
-
306
- const csv = Papa.unparse(TABLE, { columns });
307
- await touch(path, csv);
385
+ writeFilePromises.push(u.streamCSV(path, TABLE));
308
386
  }
309
387
  else {
310
- const ndjson = TABLE.map((d) => JSON.stringify(d)).join("\n");
311
- await touch(path, ndjson, false);
388
+ writeFilePromises.push(u.streamJSON(path, TABLE));
312
389
  }
313
390
 
314
391
  }
315
392
  }
316
393
  }
394
+ const fileWriteResults = await Promise.all(writeFilePromises);
317
395
 
318
396
  const importResults = { events: {}, users: {}, groups: [] };
319
397
 
@@ -323,30 +401,28 @@ async function main(config) {
323
401
  const creds = { token };
324
402
  /** @type {import('mixpanel-import').Options} */
325
403
  const commonOpts = {
326
-
327
404
  region,
328
405
  fixData: true,
329
406
  verbose: false,
330
407
  forceStream: true,
331
- strict: false,
408
+ strict: true,
332
409
  dryRun: false,
333
410
  abridged: false,
411
+ fixJson: true,
412
+ showProgress: true
334
413
  };
335
414
 
336
415
  if (eventData) {
337
- log(`importing events to mixpanel...`);
416
+ log(`importing events to mixpanel...\n`);
338
417
  const imported = await mp(creds, eventData, {
339
418
  recordType: "event",
340
- fixData: true,
341
- fixJson: true,
342
- strict: false,
343
419
  ...commonOpts,
344
420
  });
345
421
  log(`\tsent ${comma(imported.success)} events\n`);
346
422
  importResults.events = imported;
347
423
  }
348
424
  if (userProfilesData) {
349
- log(`importing user profiles to mixpanel...`);
425
+ log(`importing user profiles to mixpanel...\n`);
350
426
  const imported = await mp(creds, userProfilesData, {
351
427
  recordType: "user",
352
428
  ...commonOpts,
@@ -358,134 +434,76 @@ async function main(config) {
358
434
  for (const groupProfiles of groupProfilesData) {
359
435
  const groupKey = groupProfiles.key;
360
436
  const data = groupProfiles.data;
361
- log(`importing ${groupKey} profiles to mixpanel...`);
437
+ log(`importing ${groupKey} profiles to mixpanel...\n`);
362
438
  const imported = await mp({ token, groupKey }, data, {
363
439
  recordType: "group",
364
440
  ...commonOpts,
441
+
365
442
  });
366
443
  log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
367
444
 
368
445
  importResults.groups.push(imported);
369
446
  }
370
447
  }
371
-
372
448
  }
373
449
  log(`\n-----------------WRITES------------------`, "\n");
374
- track('end simulation', {
375
- runId,
376
- seed,
377
- numEvents,
378
- numUsers,
379
- numDays,
380
- events,
381
- anonIds,
382
- sessionIds,
383
- format,
384
- targetToken: token,
385
- region,
386
- writeToDisk,
387
- isCLI,
388
- version
389
- });
450
+ track('end simulation', trackingParams);
451
+
390
452
  return {
391
- import: importResults,
453
+ importResults,
392
454
  files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder],
455
+ eventData,
456
+ userProfilesData,
457
+ scdTableData,
458
+ groupProfilesData,
459
+ lookupTableData,
460
+ mirrorEventData,
393
461
  };
394
462
  }
395
463
 
396
-
397
-
398
-
399
- function makeProfile(props, defaults) {
400
- //build the spec
401
- const profile = {
402
- ...defaults,
403
- };
404
-
405
- for (const key in props) {
406
- try {
407
- profile[key] = u.choose(props[key]);
408
- } catch (e) {
409
- // debugger;
410
- }
411
- }
412
-
413
- return profile;
414
- }
415
- /**
416
- * @param {import('./types.d.ts').valueValid} prop
417
- * @param {string} scdKey
418
- * @param {string} distinct_id
419
- * @param {number} mutations
420
- * @param {string} $created
421
- */
422
- function makeSCD(prop, scdKey, distinct_id, mutations, $created) {
423
- if (JSON.stringify(prop) === "{}") return {};
424
- if (JSON.stringify(prop) === "[]") return [];
425
- const scdEntries = [];
426
- let lastInserted = dayjs($created);
427
- const deltaDays = dayjs().diff(lastInserted, "day");
428
-
429
- for (let i = 0; i < mutations; i++) {
430
- if (lastInserted.isAfter(dayjs())) break;
431
- const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
432
- scd.startTime = lastInserted.toISOString();
433
- lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
434
- scd.insertTime = lastInserted.toISOString();
435
- scdEntries.push({ ...scd });
436
- lastInserted = lastInserted
437
- .add(u.integer(0, deltaDays), "day")
438
- .subtract(u.integer(1, 1000), "seconds");
439
- }
440
-
441
- return scdEntries;
442
- }
443
-
444
464
  /**
445
465
  * creates a random event
446
466
  * @param {string} distinct_id
447
467
  * @param {string[]} anonymousIds
448
468
  * @param {string[]} sessionIds
449
469
  * @param {number} earliestTime
450
- * @param {Object[]} events
470
+ * @param {EventConfig} chosenEvent
451
471
  * @param {Object} superProps
452
472
  * @param {Object} groupKeys
453
473
  * @param {Boolean} isFirstEvent=false
454
474
  */
455
- function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events, superProps, groupKeys, isFirstEvent = false) {
456
- let chosenEvent = chance.pickone(events);
457
-
458
- //allow for a string shorthand
459
- if (typeof chosenEvent === "string") {
460
- chosenEvent = { event: chosenEvent, properties: {} };
461
- }
462
-
475
+ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, chosenEvent, superProps, groupKeys, isFirstEvent = false) {
476
+ const { mean = 0, dev = 2, peaks = 5 } = CONFIG.soup;
463
477
  //event model
464
- const event = {
478
+ const eventTemplate = {
465
479
  event: chosenEvent.event,
466
- $source: "AKsTimeSoup",
480
+ source: "dm4",
467
481
  };
468
482
 
469
483
  //event time
470
- if (isFirstEvent) event.time = dayjs.unix(earliestTime).toISOString();
471
- if (!isFirstEvent) event.time = AKsTimeSoup(earliestTime, NOW);
484
+ if (earliestTime > NOW) {
485
+ earliestTime = dayjs.unix(NOW).subtract(2, 'd').unix();
486
+ };
487
+
488
+ if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
489
+ if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, NOW, peaks, dev, mean);
472
490
 
473
491
  // anonymous and session ids
474
- if (global.MP_SIMULATION_CONFIG?.anonIds) event.$device_id = chance.pickone(anonymousIds);
475
- if (global.MP_SIMULATION_CONFIG?.sessionIds) event.$session_id = chance.pickone(sessionIds);
492
+ if (CONFIG?.anonIds) eventTemplate.device_id = CONFIG.chance.pickone(anonymousIds);
493
+ if (CONFIG?.sessionIds) eventTemplate.session_id = CONFIG.chance.pickone(sessionIds);
476
494
 
477
- //sometimes have a $user_id
478
- if (!isFirstEvent && chance.bool({ likelihood: 42 })) event.$user_id = distinct_id;
495
+ //sometimes have a user_id
496
+ if (!isFirstEvent && CONFIG.chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
479
497
 
480
- // ensure that there is a $user_id or $device_id
481
- if (!event.$user_id && !event.$device_id) event.$user_id = distinct_id;
498
+ // ensure that there is a user_id or device_id
499
+ if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
482
500
 
483
501
  const props = { ...chosenEvent.properties, ...superProps };
484
502
 
485
503
  //iterate through custom properties
486
504
  for (const key in props) {
487
505
  try {
488
- event[key] = u.choose(props[key]);
506
+ eventTemplate[key] = u.choose(props[key]);
489
507
  } catch (e) {
490
508
  console.error(`error with ${key} in ${chosenEvent.event} event`, e);
491
509
  debugger;
@@ -496,72 +514,223 @@ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events,
496
514
  for (const groupPair of groupKeys) {
497
515
  const groupKey = groupPair[0];
498
516
  const groupCardinality = groupPair[1];
517
+ const groupEvents = groupPair[2] || [];
499
518
 
500
- event[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
519
+ // empty array for group events means all events
520
+ if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
521
+ if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
501
522
  }
502
523
 
503
524
  //make $insert_id
504
- event.$insert_id = md5(JSON.stringify(event));
525
+ eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
505
526
 
506
- return event;
527
+ return eventTemplate;
507
528
  }
508
529
 
509
- function buildFileNames(config) {
510
- const { format = "csv", groupKeys = [], lookupTables = [] } = config;
511
- const extension = format === "csv" ? "csv" : "json";
512
- // const current = dayjs.utc().format("MM-DD-HH");
513
- const simName = config.simulationName;
514
- let writeDir = "./";
515
- if (config.writeToDisk) writeDir = mkdir("./data");
516
-
517
- const writePaths = {
518
- eventFiles: [path.join(writeDir, `${simName}-EVENTS.${extension}`)],
519
- userFiles: [path.join(writeDir, `${simName}-USERS.${extension}`)],
520
- scdFiles: [],
521
- mirrorFiles: [path.join(writeDir, `${simName}-EVENTS-FUTURE-MIRROR.${extension}`)],
522
- groupFiles: [],
523
- lookupFiles: [],
524
- folder: writeDir,
525
- };
526
-
527
- //add SCD files
528
- const scdKeys = Object.keys(config?.scdProps || {});
529
- for (const key of scdKeys) {
530
- writePaths.scdFiles.push(
531
- path.join(writeDir, `${simName}-${key}-SCD.${extension}`)
532
- );
530
+ /**
531
+ * creates a funnel of events for a user
532
+ * this is called multiple times for a user
533
+ * @param {Funnel} funnel
534
+ * @param {Person} user
535
+ * @param {UserProfile} profile
536
+ * @param {Record<string, SCDTableRow[]>} scd
537
+ * @param {number} firstEventTime
538
+ * @param {Config} config
539
+ * @return {[EventSpec[], Boolean]}
540
+ */
541
+ function makeFunnel(funnel, user, profile, scd, firstEventTime, config) {
542
+ const { hook } = config;
543
+ hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
544
+ const { sequence, conversionRate = 50, order = 'sequential', timeToConvert = 1, props } = funnel;
545
+ const { distinct_id, created, anonymousIds, sessionIds } = user;
546
+ const { superProps, groupKeys } = config;
547
+ const { name, email } = profile;
548
+
549
+ const chosenFunnelProps = { ...props, ...superProps };
550
+ for (const key in props) {
551
+ try {
552
+ chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
553
+ } catch (e) {
554
+ console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
555
+ debugger;
556
+ }
533
557
  }
534
558
 
535
- for (const groupPair of groupKeys) {
536
- const groupKey = groupPair[0];
537
- writePaths.groupFiles.push(
538
- path.join(writeDir, `${simName}-${groupKey}-GROUP.${extension}`)
539
- );
559
+ const funnelPossibleEvents = sequence.map((event) => {
560
+ const foundEvent = config.events.find((e) => e.event === event);
561
+ /** @type {EventConfig} */
562
+ const eventSpec = foundEvent || { event, properties: {} };
563
+ for (const key in eventSpec.properties) {
564
+ try {
565
+ eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
566
+ } catch (e) {
567
+ console.error(`error with ${key} in ${eventSpec.event} event`, e);
568
+ debugger;
569
+ }
570
+ }
571
+ delete eventSpec.isFirstEvent;
572
+ delete eventSpec.weight;
573
+ eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
574
+ return eventSpec;
575
+ });
576
+
577
+ const doesUserConvert = config.chance.bool({ likelihood: conversionRate });
578
+ let numStepsUserWillTake = sequence.length;
579
+ if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
580
+ const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
581
+ const msInHour = 60000 * 60;
582
+
583
+ let lastTimeJump = 0;
584
+ const funnelActualEvents = funnelPossibleEvents.slice(0, numStepsUserWillTake)
585
+ .map((event, index) => {
586
+ if (index === 0) {
587
+ event.relativeTimeMs = 0;
588
+ return event;
589
+ }
590
+
591
+ // Calculate base increment for each step
592
+ const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
593
+
594
+ // Introduce a random fluctuation factor
595
+ const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
596
+
597
+ // Ensure the time increments are increasing and add randomness
598
+ const previousTime = lastTimeJump;
599
+ const currentTime = previousTime + baseIncrement + fluctuation;
600
+
601
+ // Assign the calculated time to the event
602
+ const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
603
+ lastTimeJump = chosenTime;
604
+ event.relativeTimeMs = chosenTime;
605
+ return event;
606
+ });
607
+
608
+
609
+ let funnelActualOrder = [];
610
+
611
+ //todo
612
+ switch (order) {
613
+ case "sequential":
614
+ funnelActualOrder = funnelActualEvents;
615
+ break;
616
+ case "random":
617
+ funnelActualOrder = u.shuffleArray(funnelActualEvents);
618
+ break;
619
+ case "first-fixed":
620
+ funnelActualOrder = u.shuffleExceptFirst(funnelActualEvents);
621
+ break;
622
+ case "last-fixed":
623
+ funnelActualOrder = u.shuffleExceptLast(funnelActualEvents);
624
+ break;
625
+ case "first-and-last-fixed":
626
+ funnelActualOrder = u.fixFirstAndLast(funnelActualEvents);
627
+ break;
628
+ case "middle-fixed":
629
+ funnelActualOrder = u.shuffleOutside(funnelActualEvents);
630
+ break;
631
+ default:
632
+ funnelActualOrder = funnelActualEvents;
633
+ break;
540
634
  }
541
635
 
542
- for (const lookupTable of lookupTables) {
543
- const { key } = lookupTable;
544
- writePaths.lookupFiles.push(
545
- //lookups are always CSVs
546
- path.join(writeDir, `${simName}-${key}-LOOKUP.csv`)
547
- );
636
+ const earliestTime = firstEventTime || dayjs(created).unix();
637
+ let funnelStartTime;
638
+ let finalEvents = funnelActualOrder
639
+ .map((event, index) => {
640
+ const newEvent = makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, event, {}, groupKeys);
641
+ if (index === 0) {
642
+ funnelStartTime = dayjs(newEvent.time);
643
+ delete newEvent.relativeTimeMs;
644
+ return newEvent;
645
+ }
646
+ newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
647
+ delete newEvent.relativeTimeMs;
648
+ return newEvent;
649
+ });
650
+
651
+
652
+ hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
653
+ return [finalEvents, doesUserConvert];
654
+ }
655
+
656
+
657
+ function makeProfile(props, defaults) {
658
+ //build the spec
659
+ const profile = {
660
+ ...defaults,
661
+ };
662
+
663
+ // anonymous and session ids
664
+ if (!CONFIG?.anonIds) delete profile.anonymousIds;
665
+ if (!CONFIG?.sessionIds) delete profile.sessionIds;
666
+
667
+ for (const key in props) {
668
+ try {
669
+ profile[key] = u.choose(props[key]);
670
+ } catch (e) {
671
+ // debugger;
672
+ }
548
673
  }
549
674
 
550
- return writePaths;
675
+ return profile;
551
676
  }
552
677
 
678
+ /**
679
+ * @param {import('./types.d.ts').ValueValid} prop
680
+ * @param {string} scdKey
681
+ * @param {string} distinct_id
682
+ * @param {number} mutations
683
+ * @param {string} created
684
+ */
685
+ function makeSCD(prop, scdKey, distinct_id, mutations, created) {
686
+ if (JSON.stringify(prop) === "{}") return {};
687
+ if (JSON.stringify(prop) === "[]") return [];
688
+ const scdEntries = [];
689
+ let lastInserted = dayjs(created);
690
+ const deltaDays = dayjs().diff(lastInserted, "day");
553
691
 
554
- function enrichArray(arr = [], opts = {}) {
555
- const { hook = a => a, type = "", ...rest } = opts;
692
+ for (let i = 0; i < mutations; i++) {
693
+ if (lastInserted.isAfter(dayjs())) break;
694
+ const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
695
+ scd.startTime = lastInserted.toISOString();
696
+ lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
697
+ scd.insertTime = lastInserted.toISOString();
698
+ scdEntries.push({ ...scd });
699
+ lastInserted = lastInserted
700
+ .add(u.integer(0, deltaDays), "day")
701
+ .subtract(u.integer(1, 1000), "seconds");
702
+ }
556
703
 
557
- function transformThenPush(item) {
558
- return arr.push(hook(item, type, rest));
704
+ return scdEntries;
705
+ }
706
+
707
+ /**
708
+ * @param {EventConfig[] | string[]} events
709
+ */
710
+ function validateEvents(events) {
711
+ if (!Array.isArray(events)) throw new Error("events must be an array");
712
+ const cleanEventConfig = [];
713
+ for (const event of events) {
714
+ if (typeof event === "string") {
715
+ /** @type {EventConfig} */
716
+ const eventTemplate = {
717
+ event,
718
+ isFirstEvent: false,
719
+ properties: {},
720
+ weight: u.integer(1, 5)
721
+ };
722
+ cleanEventConfig.push(eventTemplate);
723
+ }
724
+ if (typeof event === "object") {
725
+ cleanEventConfig.push(event);
726
+ }
559
727
  }
728
+ return cleanEventConfig;
729
+ }
730
+
731
+
560
732
 
561
- arr.hPush = transformThenPush;
562
733
 
563
- return arr;
564
- };
565
734
 
566
735
 
567
736
 
@@ -569,11 +738,12 @@ function enrichArray(arr = [], opts = {}) {
569
738
  if (require.main === module) {
570
739
  isCLI = true;
571
740
  const args = cliParams();
572
- const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false } = args;
741
+ // @ts-ignore
742
+ let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, sessionIds, anonIds } = args;
743
+ // @ts-ignore
573
744
  const suppliedConfig = args._[0];
574
745
 
575
- //if the user specifics an separate config file
576
- //todo this text isn't displaying
746
+ //if the user specifies an separate config file
577
747
  let config = null;
578
748
  if (suppliedConfig) {
579
749
  console.log(`using ${suppliedConfig} for data\n`);
@@ -584,18 +754,19 @@ if (require.main === module) {
584
754
  console.log(`... using default COMPLEX configuration [everything] ...\n`);
585
755
  console.log(`... for more simple data, don't use the --complex flag ...\n`);
586
756
  console.log(`... or specify your own js config file (see docs or --help) ...\n`);
587
- config = require(path.resolve(__dirname, "./models/complex.js"));
757
+ config = require(path.resolve(__dirname, "./schemas/complex.js"));
588
758
  }
589
759
  else {
590
760
  console.log(`... using default SIMPLE configuration [events + users] ...\n`);
591
761
  console.log(`... for more complex data, use the --complex flag ...\n`);
592
- config = require(path.resolve(__dirname, "./models/simple.js"));
762
+ config = require(path.resolve(__dirname, "./schemas/simple.js"));
593
763
  }
594
764
  }
595
765
 
596
766
  //override config with cli params
597
767
  if (token) config.token = token;
598
768
  if (seed) config.seed = seed;
769
+ if (format === "csv" && config.format === "json") format = "json";
599
770
  if (format) config.format = format;
600
771
  if (numDays) config.numDays = numDays;
601
772
  if (numUsers) config.numUsers = numUsers;
@@ -603,6 +774,8 @@ if (require.main === module) {
603
774
  if (region) config.region = region;
604
775
  if (writeToDisk) config.writeToDisk = writeToDisk;
605
776
  if (writeToDisk === 'false') config.writeToDisk = false;
777
+ if (sessionIds) config.sessionIds = sessionIds;
778
+ if (anonIds) config.anonIds = anonIds;
606
779
  config.verbose = true;
607
780
 
608
781
  main(config)
@@ -610,7 +783,7 @@ if (require.main === module) {
610
783
  log(`-----------------SUMMARY-----------------`);
611
784
  const d = { success: 0, bytes: 0 };
612
785
  const darr = [d];
613
- const { events = d, groups = darr, users = d } = data.import;
786
+ const { events = d, groups = darr, users = d } = data.importResults;
614
787
  const files = data.files;
615
788
  const folder = files?.pop();
616
789
  const groupBytes = groups.reduce((acc, group) => {
@@ -643,7 +816,6 @@ if (require.main === module) {
643
816
  });
644
817
  } else {
645
818
  main.utils = { ...u };
646
- main.timeSoup = AKsTimeSoup;
647
819
  module.exports = main;
648
820
  }
649
821