make-mp-data 1.2.21 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,18 @@
4
4
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
5
  "version": "0.2.0",
6
6
  "configurations": [
7
+ {
8
+ "command": "npm run simple",
9
+ "name": "simple",
10
+ "request": "launch",
11
+ "type": "node-terminal"
12
+ },
13
+ {
14
+ "command": "npm run complex",
15
+ "name": "complex",
16
+ "request": "launch",
17
+ "type": "node-terminal"
18
+ },
7
19
  {
8
20
  "type": "node",
9
21
  "request": "launch",
@@ -15,5 +27,6 @@
15
27
  "internalConsoleOptions": "neverOpen",
16
28
  "args": ["--ignore", "./data/"]
17
29
  }
30
+
18
31
  ]
19
32
  }
@@ -1,7 +1,25 @@
1
1
  {
2
2
  "cSpell.words": [
3
+ "AVCHD",
4
+ "chupacabra",
5
+ "darr",
6
+ "dislikers",
7
+ "Dont",
8
+ "jackalope",
9
+ "mbembe",
10
+ "megalodon",
11
+ "Miley",
12
+ "mokele",
13
+ "mothman",
14
+ "nessie",
15
+ "nesty",
16
+ "planthopper",
17
+ "psyllid",
18
+ "tatzelwurm",
3
19
  "timesoup",
4
- "unparse"
20
+ "unparse",
21
+ "Vlogs",
22
+ "weindgo"
5
23
  ],
6
24
  "jest.runMode": "on-demand",
7
25
  "jest.jestCommandLine": "npm run test --",
package/README.md CHANGED
@@ -1,50 +1,103 @@
1
1
 
2
-
3
2
  # Make Mixpanel Data
4
- a quick and dirty CLI in node.js to generate fake data for mixpanel.
5
3
 
6
- ## tldr;
4
+ ## 🤖 What is this?
5
+
6
+ Generate fake Mixpanel data _quickly_ and _easily_ with a simple CLI in Node.js. **Create events, user profiles, groups profiles, SCD data, mirror tables, and lookup tables**; basically everything you need for testing + demoing mixpanel analysis features.
7
+
8
+ the CLI looks like this:
9
+
10
+ ![Generate Mixpanel data](https://aktunes.neocities.org/makeDataDemo.gif)
11
+
12
+ under the hood, `make-mp-data` is modeling data adherent to match [Mixpanel's data model](https://docs.mixpanel.com/docs/data-structure/concepts), giving you the tools you need for robust, realistical field ready test data.
13
+
14
+ ## 🚀 Quick Start
15
+
16
+ ### Basic Usage
17
+
18
+ Generate events and users, and write them to CSV files:
7
19
 
8
-
9
- ```bash
20
+ ```bash
10
21
  npx make-mp-data
11
22
  ```
12
- - makes events + users (and writes them to CSVs)
13
23
 
14
- ```bash
15
- npx make-mp-data --numUsers 100 --numEvents 10000 --days 90 --format json
24
+ ### Customizing Output
25
+
26
+ Generate 10,000 events and 100 users over the last 90 days, and write the output as JSON:
27
+
28
+ ```bash
29
+ npx make-mp-data --numUsers 100 --numEvents 10000 --numDays 90 --format json
16
30
  ```
17
- - makes ~10k events + 100 users from the last 90 days (but writes JSON)
18
31
 
32
+ ### Complex Data Models
19
33
 
20
- ```bash
34
+ Create a comprehensive dataset including events, users, groups, SCD, and lookup tables:
35
+
36
+ ```bash
21
37
  npx make-mp-data --complex
22
38
  ```
23
- - makes events + users + groups + scd + lookup tables
24
- - this includes every type of data that mixpanel supports
39
+
40
+ ### Send Data to Mixpanel
41
+
42
+ Generate and send data directly to Mixpanel using your project token:
25
43
 
26
44
  ```bash
27
45
  npx make-mp-data --token 1234
28
46
  ```
29
- - makes events + users (and send them to mixpanel)
47
+
48
+ ### Help and Options
49
+
50
+ Need more info on available options?
30
51
 
31
52
  ```bash
32
53
  npx make-mp-data --help
33
54
  ```
34
- - explains all the options you can specify
35
55
 
36
- ## customization
56
+ ## 🔧 Schema
57
+
58
+ To choose your own event and property names and values, specify a custom data model and additional options:
37
59
 
38
60
  ```bash
39
61
  npx make-mp-data [dataModel.js] [options]
40
62
  ```
41
- ex.
63
+
64
+ Example:
42
65
 
43
66
  ```bash
44
67
  npx make-mp-data ecommSpec.js --token 1234 --numDays 30 --numUsers 1000 --numEvents 1000000
45
68
  ```
46
69
 
47
- see `--help` for a full list of options
70
+ ### Data Models
71
+
72
+ Check out `./models/` for example `dataModel.js` files to get started quickly.
73
+
74
+ ## 🛠️ CLI Options
75
+
76
+ Here's a breakdown of the CLI options you can use with `make-mp-data`:
77
+
78
+ - `--numUsers`: Number of users to generate.
79
+ - `--numEvents`: Number of events to generate.
80
+ - `--numDays`: Number of days over which to spread the generated data.
81
+ - `--format`: Output format (`csv` or `json`).
82
+ - `--token`: Mixpanel project token for direct data import.
83
+ - `--region`: Mixpanel data region (`US`, `EU`).
84
+ - `--writeToDisk`: Whether to write the data to disk (`true` or `false`).
85
+ - `--verbose`: Enable verbose logging.
86
+ - `--complex`: create a complex set models including groups, SCD, and lookup tables.
87
+ - `--simple`: create a simple dataset including events, and users
88
+
89
+ ## 📄 Examples
90
+
91
+ Check out the examples directory for sample data models:
92
+
93
+ ```bash
94
+ ls ./models/
95
+ ```
96
+
97
+ These models provide a great starting point for creating your own custom data generation scripts.
98
+
99
+ ## 🤝 Contributing
48
100
 
49
- see `./models/` for a few `dataModel.js` examples...
101
+ Feel free to fork this repository and submit pull requests. Contributions are always welcome!
50
102
 
103
+ For any issues or feature requests, please create an issue on the [GitHub repository](https://github.com/ak--47/make-mp-data/issues).
package/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  #! /usr/bin/env node
2
2
 
3
-
4
3
  /*
5
4
  make fake mixpanel data easily!
6
5
  by AK
@@ -20,9 +19,16 @@ const dayjs = require("dayjs");
20
19
  const utc = require("dayjs/plugin/utc");
21
20
  dayjs.extend(utc);
22
21
  const cliParams = require("./cli.js");
23
- const { makeName, md5 } = require('ak-tools');
22
+ const { makeName, md5, clone, tracker, uid } = require('ak-tools');
24
23
  const NOW = dayjs().unix();
25
24
  let VERBOSE = false;
25
+ let isCLI = false;
26
+
27
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41");
28
+ function track() {
29
+ if (process.env.NODE_ENV === 'test') return;
30
+ metrics.track(...arguments);
31
+ }
26
32
 
27
33
  /** @typedef {import('./types.d.ts').Config} Config */
28
34
  /** @typedef {import('./types.d.ts').EventConfig} EventConfig */
@@ -41,26 +47,45 @@ async function main(config) {
41
47
  superProps = { platform: ["web", "iOS", "Android"] },
42
48
  userProps = {
43
49
  favoriteColor: ["red", "green", "blue", "yellow"],
44
- spiritAnimal: chance.animal,
50
+ spiritAnimal: chance.animal.bind(chance),
45
51
  },
46
- scdProps = { NPS: u.weightedRange(0, 10, 150, 1.6) },
52
+ scdProps = {},
53
+ mirrorProps = {},
47
54
  groupKeys = [],
48
55
  groupProps = {},
49
56
  lookupTables = [],
50
- anonIds = true,
51
- sessionIds = true,
57
+ anonIds = false,
58
+ sessionIds = false,
52
59
  format = "csv",
53
60
  token = null,
54
61
  region = "US",
55
62
  writeToDisk = false,
56
63
  verbose = false,
64
+ hook = (record) => record,
57
65
  } = config;
58
66
  VERBOSE = verbose;
59
67
  config.simulationName = makeName();
68
+ const { simulationName } = config;
60
69
  global.MP_SIMULATION_CONFIG = config;
61
70
  const uuidChance = new Chance(seed);
71
+ const runId = uid(42);
72
+ track('start simulation', {
73
+ runId,
74
+ seed,
75
+ numEvents,
76
+ numUsers,
77
+ numDays,
78
+ events,
79
+ anonIds,
80
+ sessionIds,
81
+ format,
82
+ token,
83
+ region,
84
+ writeToDisk,
85
+ isCLI
86
+ });
62
87
  log(`------------------SETUP------------------`);
63
- log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
88
+ log(`\nyour data simulation will heretofore be known as: \n\n\t${simulationName.toUpperCase()}...\n`);
64
89
  log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk }, null, 2));
65
90
  log(`------------------SETUP------------------`, "\n");
66
91
 
@@ -97,11 +122,16 @@ async function main(config) {
97
122
  .filter((e) => !e.isFirstEvent);
98
123
 
99
124
  const firstEvents = events.filter((e) => e.isFirstEvent);
100
- const eventData = [];
101
- const userProfilesData = [];
102
- let scdTableData = [];
103
- const groupProfilesData = [];
104
- const lookupTableData = [];
125
+ const eventData = enrichArray([], { hook, type: "event", config });
126
+ const userProfilesData = enrichArray([], { hook, type: "user", config });
127
+ const scdTableKeys = Object.keys(scdProps);
128
+ const scdTableData = [];
129
+ for (const [index, key] of scdTableKeys.entries()) {
130
+ scdTableData[index] = enrichArray([], { hook, type: "scd", config, scdKey: key });
131
+ }
132
+ // const scdTableData = enrichArray([], { hook, type: "scd", config });
133
+ const groupProfilesData = enrichArray([], { hook, type: "groups", config });
134
+ const lookupTableData = enrichArray([], { hook, type: "lookups", config });
105
135
  const avgEvPerUser = Math.floor(numEvents / numUsers);
106
136
 
107
137
  //user loop
@@ -110,15 +140,20 @@ async function main(config) {
110
140
  u.progress("users", i);
111
141
  const user = generateUser();
112
142
  const { distinct_id, $created, anonymousIds, sessionIds } = user;
113
- userProfilesData.push(makeProfile(userProps, user));
114
- const mutations = chance.integer({ min: 1, max: 10 });
115
- scdTableData.push(makeSCD(scdProps, distinct_id, mutations, $created));
143
+ userProfilesData.hPush(makeProfile(userProps, user));
144
+
145
+ //scd loop
146
+ for (const [index, key] of scdTableKeys.entries()) {
147
+ const mutations = chance.integer({ min: 1, max: 10 });
148
+ scdTableData[index].hPush(makeSCD(scdProps[key], key, distinct_id, mutations, $created));
149
+ }
150
+
116
151
  const numEventsThisUser = Math.round(
117
152
  chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / u.integer(3, 7) })
118
153
  );
119
-
154
+
120
155
  if (firstEvents.length) {
121
- eventData.push(
156
+ eventData.hPush(
122
157
  makeEvent(
123
158
  distinct_id,
124
159
  anonymousIds,
@@ -134,7 +169,7 @@ async function main(config) {
134
169
 
135
170
  //event loop
136
171
  for (let j = 0; j < numEventsThisUser; j++) {
137
- eventData.push(
172
+ eventData.hPush(
138
173
  makeEvent(
139
174
  distinct_id,
140
175
  anonymousIds,
@@ -149,7 +184,7 @@ async function main(config) {
149
184
  }
150
185
 
151
186
  //flatten SCD
152
- scdTableData = scdTableData.flat();
187
+ scdTableData.forEach((table, index) => scdTableData[index] = table.flat());
153
188
 
154
189
  log("\n");
155
190
 
@@ -163,11 +198,11 @@ async function main(config) {
163
198
  const group = {
164
199
  [groupKey]: i,
165
200
  ...makeProfile(groupProps[groupKey]),
166
- $distinct_id: i,
201
+ // $distinct_id: i,
167
202
  };
168
203
  groupProfiles.push(group);
169
204
  }
170
- groupProfilesData.push({ key: groupKey, data: groupProfiles });
205
+ groupProfilesData.hPush({ key: groupKey, data: groupProfiles });
171
206
  }
172
207
  log("\n");
173
208
 
@@ -183,27 +218,58 @@ async function main(config) {
183
218
  };
184
219
  data.push(item);
185
220
  }
186
- lookupTableData.push({ key, data });
221
+ lookupTableData.hPush({ key, data });
222
+ }
223
+
224
+ // deal with mirror props
225
+ let mirrorEventData = [];
226
+ const mirrorPropKeys = Object.keys(mirrorProps);
227
+ if (mirrorPropKeys.length) {
228
+ mirrorEventData = clone(eventData);
229
+ for (const row of mirrorEventData) {
230
+ for (const key of mirrorPropKeys) {
231
+ if (mirrorProps[key]?.events?.includes(row?.event)) row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
232
+ if (mirrorProps[key]?.events === "*") row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
233
+ }
234
+ }
187
235
  }
188
- const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, folder } =
236
+
237
+ const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder } =
189
238
  buildFileNames(config);
190
239
  const pairs = [
191
- [eventFiles, eventData],
192
- [userFiles, userProfilesData],
240
+ [eventFiles, [eventData]],
241
+ [userFiles, [userProfilesData]],
193
242
  [scdFiles, scdTableData],
194
243
  [groupFiles, groupProfilesData],
195
244
  [lookupFiles, lookupTableData],
245
+ [mirrorFiles, [mirrorEventData]],
196
246
  ];
197
- log("\n")
247
+ log("\n");
198
248
  log(`---------------SIMULATION----------------`, "\n");
199
249
 
200
250
  if (!writeToDisk && !token) {
251
+ track('end simulation', {
252
+ runId,
253
+ seed,
254
+ numEvents,
255
+ numUsers,
256
+ numDays,
257
+ events,
258
+ anonIds,
259
+ sessionIds,
260
+ format,
261
+ token,
262
+ region,
263
+ writeToDisk,
264
+ isCLI
265
+ });
201
266
  return {
202
267
  eventData,
203
268
  userProfilesData,
204
269
  scdTableData,
205
270
  groupProfilesData,
206
271
  lookupTableData,
272
+ mirrorEventData,
207
273
  import: {},
208
274
  files: []
209
275
  };
@@ -211,32 +277,39 @@ async function main(config) {
211
277
  log(`-----------------WRITES------------------`, `\n\n`);
212
278
  //write the files
213
279
  if (writeToDisk) {
214
- if (verbose) log(`writing files... for ${config.simulationName}`);
215
- loopFiles: for (const pair of pairs) {
216
- const [paths, data] = pair;
280
+ if (verbose) log(`writing files... for ${simulationName}`);
281
+ loopFiles: for (const ENTITY of pairs) {
282
+ const [paths, data] = ENTITY;
217
283
  if (!data.length) continue loopFiles;
218
- for (const path of paths) {
219
- let datasetsToWrite;
220
- if (data?.[0]?.["key"]) datasetsToWrite = data.map((d) => d.data);
221
- else datasetsToWrite = [data];
222
- for (const writeData of datasetsToWrite) {
223
- //if it's a lookup table, it's always a CSV
224
- if (format === "csv" || path.includes("-LOOKUP.csv")) {
225
- log(`\twriting ${path}`);
226
- const columns = u.getUniqueKeys(writeData);
227
- //papa parse needs nested JSON stringified
228
- writeData.forEach((e) => {
229
- for (const key in e) {
230
- if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
231
- }
232
- });
233
- const csv = Papa.unparse(writeData, { columns });
234
- await touch(path, csv);
235
- } else {
236
- const ndjson = data.map((d) => JSON.stringify(d)).join("\n");
237
- await touch(path, ndjson, false);
238
- }
284
+ for (const [index, path] of paths.entries()) {
285
+ let TABLE;
286
+ //group + lookup tables are structured differently
287
+ if (data?.[index]?.["key"]) {
288
+ TABLE = data[index].data;
289
+ }
290
+ else {
291
+ TABLE = data[index];
292
+ }
293
+
294
+ log(`\twriting ${path}`);
295
+ //if it's a lookup table, it's always a CSV
296
+ if (format === "csv" || path.includes("-LOOKUP.csv")) {
297
+ const columns = u.getUniqueKeys(TABLE);
298
+ //papa parse needs eac nested field JSON stringified
299
+ TABLE.forEach((e) => {
300
+ for (const key in e) {
301
+ if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
302
+ }
303
+ });
304
+
305
+ const csv = Papa.unparse(TABLE, { columns });
306
+ await touch(path, csv);
307
+ }
308
+ else {
309
+ const ndjson = TABLE.map((d) => JSON.stringify(d)).join("\n");
310
+ await touch(path, ndjson, false);
239
311
  }
312
+
240
313
  }
241
314
  }
242
315
  }
@@ -297,9 +370,24 @@ async function main(config) {
297
370
 
298
371
  }
299
372
  log(`\n-----------------WRITES------------------`, "\n");
373
+ track('end simulation', {
374
+ runId,
375
+ seed,
376
+ numEvents,
377
+ numUsers,
378
+ numDays,
379
+ events,
380
+ anonIds,
381
+ sessionIds,
382
+ format,
383
+ token,
384
+ region,
385
+ writeToDisk,
386
+ isCLI
387
+ });
300
388
  return {
301
389
  import: importResults,
302
- files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, folder],
390
+ files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder],
303
391
  };
304
392
  }
305
393
 
@@ -322,16 +410,23 @@ function makeProfile(props, defaults) {
322
410
 
323
411
  return profile;
324
412
  }
325
-
326
- function makeSCD(props, distinct_id, mutations, $created) {
327
- if (JSON.stringify(props) === "{}") return [];
413
+ /**
414
+ * @param {import('./types.d.ts').valueValid} prop
415
+ * @param {string} scdKey
416
+ * @param {string} distinct_id
417
+ * @param {number} mutations
418
+ * @param {string} $created
419
+ */
420
+ function makeSCD(prop, scdKey, distinct_id, mutations, $created) {
421
+ if (JSON.stringify(prop) === "{}") return {};
422
+ if (JSON.stringify(prop) === "[]") return [];
328
423
  const scdEntries = [];
329
424
  let lastInserted = dayjs($created);
330
425
  const deltaDays = dayjs().diff(lastInserted, "day");
331
426
 
332
427
  for (let i = 0; i < mutations; i++) {
333
428
  if (lastInserted.isAfter(dayjs())) break;
334
- const scd = makeProfile(props, { distinct_id });
429
+ const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
335
430
  scd.startTime = lastInserted.toISOString();
336
431
  lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
337
432
  scd.insertTime = lastInserted.toISOString();
@@ -420,12 +515,21 @@ function buildFileNames(config) {
420
515
  const writePaths = {
421
516
  eventFiles: [path.join(writeDir, `${simName}-EVENTS.${extension}`)],
422
517
  userFiles: [path.join(writeDir, `${simName}-USERS.${extension}`)],
423
- scdFiles: [path.join(writeDir, `${simName}-SCD.${extension}`)],
518
+ scdFiles: [],
519
+ mirrorFiles: [path.join(writeDir, `${simName}-EVENTS-FUTURE-MIRROR.${extension}`)],
424
520
  groupFiles: [],
425
521
  lookupFiles: [],
426
522
  folder: writeDir,
427
523
  };
428
524
 
525
+ //add SCD files
526
+ const scdKeys = Object.keys(config?.scdProps || {});
527
+ for (const key of scdKeys) {
528
+ writePaths.scdFiles.push(
529
+ path.join(writeDir, `${simName}-${key}-SCD.${extension}`)
530
+ );
531
+ }
532
+
429
533
  for (const groupPair of groupKeys) {
430
534
  const groupKey = groupPair[0];
431
535
  writePaths.groupFiles.push(
@@ -445,10 +549,23 @@ function buildFileNames(config) {
445
549
  }
446
550
 
447
551
 
552
+ function enrichArray(arr = [], opts = {}) {
553
+ const { hook = a => a, type = "", ...rest } = opts;
554
+
555
+ function transformThenPush(item) {
556
+ return arr.push(hook(item, type, rest));
557
+ }
558
+
559
+ arr.hPush = transformThenPush;
560
+
561
+ return arr;
562
+ };
563
+
448
564
 
449
565
 
450
566
  // this is for CLI
451
567
  if (require.main === module) {
568
+ isCLI = true;
452
569
  const args = cliParams();
453
570
  const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false } = args;
454
571
  const suppliedConfig = args._[0];
@@ -489,8 +606,8 @@ if (require.main === module) {
489
606
  main(config)
490
607
  .then((data) => {
491
608
  log(`-----------------SUMMARY-----------------`);
492
- const d = {success: 0, bytes: 0};
493
- const darr = [d]
609
+ const d = { success: 0, bytes: 0 };
610
+ const darr = [d];
494
611
  const { events = d, groups = darr, users = d } = data.import;
495
612
  const files = data.files;
496
613
  const folder = files?.pop();
@@ -530,5 +647,13 @@ if (require.main === module) {
530
647
 
531
648
 
532
649
  function log(...args) {
650
+ const cwd = process.cwd(); // Get the current working directory
651
+
652
+ for (let i = 0; i < args.length; i++) {
653
+ // Replace occurrences of the current working directory with "./" in string arguments
654
+ if (typeof args[i] === 'string') {
655
+ args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
656
+ }
657
+ }
533
658
  if (VERBOSE) console.log(...args);
534
659
  }