make-mp-data 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/scripts/new.sh CHANGED
@@ -13,23 +13,39 @@ const dayjs = require("dayjs");
13
13
  const utc = require("dayjs/plugin/utc");
14
14
  dayjs.extend(utc);
15
15
  require("dotenv").config();
16
- const u = require("../core/utils");
16
+ const u = require("../src/utils");
17
17
  const v = require("ak-tools");
18
18
  const chance = u.initChance(SEED);
19
+ const num_users = 25_000
20
+ const days = 100
19
21
 
20
22
  /** @type {import("../types").Config} */
21
23
  const config = {
22
24
  token: "",
23
25
  seed: SEED,
24
- numDays: 100, //how many days worth of data
25
- numEvents: 100000, //how many events
26
- numUsers: 1000, //how many users
27
- format: "csv", //csv or json
28
- region: "US",
29
- makeChart: false,
30
- anonIds: false,
31
- sessionIds: false,
32
- writeToDisk: false,
26
+ numDays: days,
27
+ numEvents: num_users * 100,
28
+ numUsers: num_users,
29
+ hasAnonIds: true,
30
+ hasSessionIds: true,
31
+
32
+ hasLocation: true,
33
+ hasAndroidDevices: true,
34
+ hasIOSDevices: true,
35
+ hasDesktopDevices: true,
36
+ hasBrowser: true,
37
+ hasCampaigns: true,
38
+ isAnonymous: false,
39
+ hasAdSpend: true,
40
+
41
+ hasAvatar: true,
42
+ makeChart: false,
43
+
44
+ batchSize: 500_000,
45
+ concurrency: 500,
46
+ writeToDisk: false,
47
+
48
+ funnels: [],
33
49
  events: [],
34
50
  superProps: {},
35
51
  userProps: {},
@@ -15,7 +15,6 @@ by ak@mixpanel.com
15
15
 
16
16
  function cliParams() {
17
17
  console.log(hero);
18
- // @ts-ignore
19
18
  const args = yargs(process.argv.splice(2))
20
19
  .scriptName("make-mp-data")
21
20
  .usage(`\nusage:\nnpx $0 [dataModel.js] [options]
@@ -85,6 +84,13 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
85
84
  describe: 'either US or EU',
86
85
  type: 'string'
87
86
  })
87
+ .option('concurrency', {
88
+ alias: 'conn',
89
+ default: 500,
90
+ demandOption: false,
91
+ describe: 'concurrency level for data generation',
92
+ type: 'number'
93
+ })
88
94
  .options("complex", {
89
95
  demandOption: false,
90
96
  default: false,
@@ -101,7 +107,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
101
107
  type: 'boolean',
102
108
  coerce: boolCoerce
103
109
  })
104
- .option("sessionIds", {
110
+ .option("hasSessionIds", {
105
111
  demandOption: false,
106
112
  default: false,
107
113
  describe: 'create session ids in the data',
@@ -109,7 +115,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
109
115
  type: 'boolean',
110
116
  coerce: boolCoerce
111
117
  })
112
- .option("anonIds", {
118
+ .option("hasAnonIds", {
113
119
  demandOption: false,
114
120
  default: false,
115
121
  describe: 'create anonymous ids in the data',
@@ -187,6 +193,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
187
193
  type: 'boolean',
188
194
  coerce: boolCoerce
189
195
  })
196
+
190
197
  .help()
191
198
  .wrap(null)
192
199
  .argv;
@@ -15,7 +15,7 @@ const { domainSuffix, domainPrefix } = require('./defaults');
15
15
  /** @typedef {import('../types').Config} Config */
16
16
  /** @typedef {import('../types').EventConfig} EventConfig */
17
17
  /** @typedef {import('../types').ValueValid} ValueValid */
18
- /** @typedef {import('../types').EnrichedArray} hookArray */
18
+ /** @typedef {import('../types').HookedArray} hookArray */
19
19
  /** @typedef {import('../types').hookArrayOptions} hookArrayOptions */
20
20
  /** @typedef {import('../types').Person} Person */
21
21
  /** @typedef {import('../types').Funnel} Funnel */
@@ -40,7 +40,6 @@ function initChance(seed) {
40
40
  if (process.env.SEED) seed = process.env.SEED; // Override seed with environment variable if available
41
41
  if (!chanceInitialized) {
42
42
  globalChance = new Chance(seed);
43
- if (global.MP_SIMULATION_CONFIG) global.MP_SIMULATION_CONFIG.chance = globalChance;
44
43
  chanceInitialized = true;
45
44
  }
46
45
  return globalChance;
@@ -52,11 +51,11 @@ function initChance(seed) {
52
51
  */
53
52
  function getChance() {
54
53
  if (!chanceInitialized) {
55
- const seed = process.env.SEED || global.MP_SIMULATION_CONFIG?.seed;
54
+ const seed = process.env.SEED || "";
56
55
  if (!seed) {
57
- return new Chance();
56
+ return new Chance(); // this is a new RNG and therefore not deterministic
58
57
  }
59
- return initChance(seed);
58
+ return initChance(seed);
60
59
  }
61
60
  return globalChance;
62
61
  }
@@ -312,52 +311,6 @@ function range(a, b, step = 1) {
312
311
  };
313
312
 
314
313
 
315
- /**
316
- * create funnels out of random events
317
- * @param {EventConfig[]} events
318
- */
319
- function inferFunnels(events) {
320
- const createdFunnels = [];
321
- const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
322
- const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
323
- const numFunnelsToCreate = Math.ceil(usageEvents.length);
324
- /** @type {Funnel} */
325
- const funnelTemplate = {
326
- sequence: [],
327
- conversionRate: 50,
328
- order: 'sequential',
329
- requireRepeats: false,
330
- props: {},
331
- timeToConvert: 1,
332
- isFirstFunnel: false,
333
- weight: 1
334
- };
335
- if (firstEvents.length) {
336
- for (const event of firstEvents) {
337
- createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
338
- }
339
- }
340
-
341
- //at least one funnel with all usage events
342
- createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
343
-
344
- //for the rest, make random funnels
345
- followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
346
- /** @type {Funnel} */
347
- const funnel = { ...clone(funnelTemplate) };
348
- funnel.conversionRate = integer(25, 75);
349
- funnel.timeToConvert = integer(1, 10);
350
- funnel.weight = integer(1, 10);
351
- const sequence = shuffleArray(usageEvents).slice(0, integer(2, usageEvents.length));
352
- funnel.sequence = sequence;
353
- funnel.order = 'random';
354
- createdFunnels.push(funnel);
355
- }
356
-
357
- return createdFunnels;
358
-
359
- }
360
-
361
314
 
362
315
  /*
363
316
  ----
@@ -674,7 +627,7 @@ function validateEventConfig(events) {
674
627
  return cleanEventConfig;
675
628
  }
676
629
 
677
- function validateTime(chosenTime, earliestTime, latestTime) {
630
+ function validTime(chosenTime, earliestTime, latestTime) {
678
631
  if (!earliestTime) earliestTime = global.NOW - (60 * 60 * 24 * 30); // 30 days ago
679
632
  if (!latestTime) latestTime = global.NOW;
680
633
 
@@ -691,6 +644,17 @@ function validateTime(chosenTime, earliestTime, latestTime) {
691
644
  return false;
692
645
  }
693
646
 
647
+ function validEvent(row) {
648
+ if (!row) return false;
649
+ if (!row.event) return false;
650
+ if (!row.time) return false;
651
+ if (!row.device_id && !row.user_id) return false;
652
+ if (!row.insert_id) return false;
653
+ if (!row.source) return false;
654
+ if (typeof row.time !== 'string') return false;
655
+ return true;
656
+ }
657
+
694
658
 
695
659
  /*
696
660
  ----
@@ -698,68 +662,7 @@ META
698
662
  ----
699
663
  */
700
664
 
701
- /**
702
- * our meta programming function which lets you mutate items as they are pushed into an array
703
- * @param {any[]} arr
704
- * @param {hookArrayOptions} opts
705
- * @returns {hookArray}}
706
- */
707
- function hookArray(arr = [], opts = {}) {
708
- const { hook = a => a, type = "", ...rest } = opts;
709
-
710
- function transformThenPush(item) {
711
- if (item === null) return false;
712
- if (item === undefined) return false;
713
- if (typeof item === 'object') {
714
- if (Object.keys(item).length === 0) return false;
715
- }
716
-
717
- //hook is passed an array
718
- if (Array.isArray(item)) {
719
- for (const i of item) {
720
- try {
721
- const enriched = hook(i, type, rest);
722
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
723
- else arr.push(enriched);
724
-
725
- }
726
- catch (e) {
727
- console.error(`\n\nyour hook had an error\n\n`, e);
728
- arr.push(i);
729
- return false;
730
- }
731
-
732
- }
733
- return true;
734
- }
735
-
736
- //hook is passed a single item
737
- else {
738
- try {
739
- const enriched = hook(item, type, rest);
740
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
741
- else arr.push(enriched);
742
- return true;
743
- }
744
- catch (e) {
745
- console.error(`\n\nyour hook had an error\n\n`, e);
746
- arr.push(item);
747
- return false;
748
- }
749
- }
750
-
751
- }
752
-
753
- /** @type {hookArray} */
754
- // @ts-ignore
755
- const enrichedArray = arr;
756
-
757
665
 
758
- enrichedArray.hookPush = transformThenPush;
759
-
760
-
761
- return enrichedArray;
762
- };
763
666
 
764
667
  /**
765
668
  * @param {Config} config
@@ -770,7 +673,7 @@ function buildFileNames(config) {
770
673
  extension = format === "csv" ? "csv" : "json";
771
674
  // const current = dayjs.utc().format("MM-DD-HH");
772
675
  let simName = config.simulationName;
773
- let writeDir = "./";
676
+ let writeDir = typeof config.writeToDisk === 'string' ? config.writeToDisk : "./";
774
677
  if (config.writeToDisk) {
775
678
  const dataFolder = path.resolve("./data");
776
679
  if (existsSync(dataFolder)) writeDir = dataFolder;
@@ -835,7 +738,6 @@ function buildFileNames(config) {
835
738
  * @param {[string, number][]} arrayOfArrays
836
739
  */
837
740
  function progress(arrayOfArrays) {
838
- // @ts-ignore
839
741
  readline.cursorTo(process.stdout, 0);
840
742
  let message = "";
841
743
  for (const status of arrayOfArrays) {
@@ -873,8 +775,9 @@ CORE
873
775
  */
874
776
 
875
777
  //the function which generates $distinct_id + $anonymous_ids, $session_ids, and created, skewing towards the present
876
- function generateUser(user_id, numDays, amplitude = 1, frequency = 1, skew = 1) {
778
+ function generateUser(user_id, opts, amplitude = 1, frequency = 1, skew = 1) {
877
779
  const chance = getChance();
780
+ const { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds } = opts;
878
781
  // Uniformly distributed `u`, then skew applied
879
782
  let u = Math.pow(chance.random(), skew);
880
783
 
@@ -886,16 +789,18 @@ function generateUser(user_id, numDays, amplitude = 1, frequency = 1, skew = 1)
886
789
 
887
790
  // Clamp values to ensure they are within the desired range
888
791
  daysAgoBorn = Math.min(daysAgoBorn, numDays);
792
+ const props = person(user_id, daysAgoBorn, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds);
889
793
 
890
794
  const user = {
891
795
  distinct_id: user_id,
892
- ...person(numDays),
796
+ ...props,
893
797
  };
894
798
 
895
799
 
896
800
  return user;
897
801
  }
898
802
 
803
+ let soupHits = 0;
899
804
  /**
900
805
  * build sign waves basically
901
806
  * @param {number} [earliestTime]
@@ -921,8 +826,9 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
921
826
  let isValidTime = false;
922
827
  do {
923
828
  iterations++;
829
+ soupHits++;
924
830
  offset = chance.normal({ mean: mean, dev: chunkSize / deviation });
925
- isValidTime = validateTime(chunkMid + offset, earliestTime, latestTime);
831
+ isValidTime = validTime(chunkMid + offset, earliestTime, latestTime);
926
832
  if (iterations > 25000) {
927
833
  throw `${iterations} iterations... exceeded`;
928
834
  }
@@ -946,15 +852,17 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
946
852
  * @param {string} userId
947
853
  * @param {number} bornDaysAgo=30
948
854
  * @param {boolean} isAnonymous
855
+ * @param {boolean} hasAvatar
856
+ * @param {boolean} hasAnonIds
857
+ * @param {boolean} hasSessionIds
949
858
  * @return {Person}
950
859
  */
951
- function person(userId, bornDaysAgo = 30, isAnonymous = false) {
860
+ function person(userId, bornDaysAgo = 30, isAnonymous = false, hasAvatar = false, hasAnonIds = false, hasSessionIds = false) {
952
861
  const chance = getChance();
953
862
  //names and photos
954
863
  const l = chance.letter.bind(chance);
955
864
  let gender = chance.pickone(['male', 'female']);
956
865
  if (!gender) gender = "female";
957
- // @ts-ignore
958
866
  let first = chance.first({ gender });
959
867
  let last = chance.last();
960
868
  let name = `${first} ${last}`;
@@ -982,21 +890,23 @@ function person(userId, bornDaysAgo = 30, isAnonymous = false) {
982
890
  user.name = "Anonymous User";
983
891
  user.email = l() + l() + `*`.repeat(integer(3, 6)) + l() + `@` + l() + `*`.repeat(integer(3, 6)) + l() + `.` + choose(domainSuffix);
984
892
  delete user.avatar;
985
-
986
893
  }
987
894
 
895
+ if (!hasAvatar) delete user.avatar;
896
+
988
897
  //anon Ids
989
- if (global.MP_SIMULATION_CONFIG?.anonIds) {
898
+ if (hasAnonIds) {
990
899
  const clusterSize = integer(2, 10);
991
900
  for (let i = 0; i < clusterSize; i++) {
992
901
  const anonId = uid(42);
993
902
  user.anonymousIds.push(anonId);
994
903
  }
995
-
996
904
  }
997
905
 
906
+ if (!hasAnonIds) delete user.anonymousIds;
907
+
998
908
  //session Ids
999
- if (global.MP_SIMULATION_CONFIG?.sessionIds) {
909
+ if (hasSessionIds) {
1000
910
  const sessionSize = integer(5, 30);
1001
911
  for (let i = 0; i < sessionSize; i++) {
1002
912
  const sessionId = [uid(5), uid(5), uid(5), uid(5)].join("-");
@@ -1004,6 +914,8 @@ function person(userId, bornDaysAgo = 30, isAnonymous = false) {
1004
914
  }
1005
915
  }
1006
916
 
917
+ if (!hasSessionIds) delete user.sessionIds;
918
+
1007
919
  return user;
1008
920
  };
1009
921
 
@@ -1078,7 +990,10 @@ module.exports = {
1078
990
 
1079
991
  initChance,
1080
992
  getChance,
1081
- validateTime,
993
+
994
+ validTime,
995
+ validEvent,
996
+
1082
997
  boxMullerRandom,
1083
998
  applySkew,
1084
999
  mapToRange,
@@ -1100,12 +1015,10 @@ module.exports = {
1100
1015
  shuffleOutside,
1101
1016
  interruptArray,
1102
1017
  generateUser,
1103
- hookArray,
1104
1018
  optimizedBoxMuller,
1105
1019
  buildFileNames,
1106
1020
  streamJSON,
1107
1021
  streamCSV,
1108
- inferFunnels,
1109
1022
  datesBetween,
1110
1023
  weighChoices
1111
1024
  };
@@ -0,0 +1,52 @@
1
+ /*
2
+ ----
3
+ TO DOs
4
+ ----
5
+ */
6
+
7
+ //!feature: fixedTimeFunnel? if set this funnel will occur for all users at the same time ['cards charged', 'charge complete']
8
+ //!feature: churn ... is churnFunnel, possible to return, etc
9
+ //!feature: send SCD data to mixpanel (blocked on dev)
10
+ //!feature: send and map lookup tables to mixpanel (also blocked on dev)
11
+ //!bug: using --mc flag reverts to --complex for some reason
12
+
13
+
14
+ import main from "../../index.js";
15
+ import simple from '../../schemas/simple.js';
16
+
17
+ /** @typedef {import('../../types').Config} Config */
18
+
19
+ /** @type {Config} */
20
+ const noWrites = {
21
+ ...simple,
22
+ numUsers: 10_000,
23
+ numEvents: 250_000,
24
+ writeToDisk: false,
25
+ };
26
+
27
+ /** @type {Config} */
28
+ const yesWrites = {
29
+ ...noWrites,
30
+ writeToDisk: true
31
+ };
32
+
33
+ console.log('concurrency benchmarking');
34
+
35
+ const concurrency = [1, 2, 3, 4, 5];
36
+
37
+ const results = [];
38
+ for (const concurrent of concurrency) {
39
+ console.log(`concurrency: ${concurrent}`);
40
+ // @ts-ignore
41
+ const test = await main({ ...noWrites, concurrency: concurrent });
42
+ results.push({ human: test.time.human, concurrency: concurrent });
43
+ console.log(`\t\tdone: ${test.time.human}\n\n`);
44
+ }
45
+
46
+ const display = results.map((r) => {
47
+ return `concurrency: ${r.concurrency} | duration: ${r.human}`;
48
+ });
49
+
50
+ console.log(display.join('\n\n'));
51
+
52
+ debugger;