make-mp-data 1.2.25 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,18 @@
4
4
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
5
  "version": "0.2.0",
6
6
  "configurations": [
7
+ {
8
+ "command": "npm run simple",
9
+ "name": "simple",
10
+ "request": "launch",
11
+ "type": "node-terminal"
12
+ },
13
+ {
14
+ "command": "npm run complex",
15
+ "name": "complex",
16
+ "request": "launch",
17
+ "type": "node-terminal"
18
+ },
7
19
  {
8
20
  "type": "node",
9
21
  "request": "launch",
@@ -15,5 +27,6 @@
15
27
  "internalConsoleOptions": "neverOpen",
16
28
  "args": ["--ignore", "./data/"]
17
29
  }
30
+
18
31
  ]
19
32
  }
package/README.md CHANGED
@@ -1,50 +1,103 @@
1
1
 
2
-
3
2
  # Make Mixpanel Data
4
- a quick and dirty CLI in node.js to generate fake data for mixpanel.
5
3
 
6
- ## tldr;
4
+ ## 🤖 What is this?
5
+
6
+ Generate fake Mixpanel data _quickly_ and _easily_ with a simple CLI in Node.js. **Create events, user profiles, groups profiles, SCD data, mirror tables, and lookup tables**; basically everything you need for testing + demoing mixpanel analysis features.
7
+
8
+ the CLI looks like this:
9
+
10
+ ![Generate Mixpanel data](https://aktunes.neocities.org/makeDataDemo.gif)
11
+
12
+ under the hood, `make-mp-data` is modeling data adherent to match [Mixpanel's data model](https://docs.mixpanel.com/docs/data-structure/concepts), giving you the tools you need for robust, realistical field ready test data.
13
+
14
+ ## 🚀 Quick Start
15
+
16
+ ### Basic Usage
17
+
18
+ Generate events and users, and write them to CSV files:
7
19
 
8
-
9
- ```bash
20
+ ```bash
10
21
  npx make-mp-data
11
22
  ```
12
- - makes events + users (and writes them to CSVs)
13
23
 
14
- ```bash
15
- npx make-mp-data --numUsers 100 --numEvents 10000 --days 90 --format json
24
+ ### Customizing Output
25
+
26
+ Generate 10,000 events and 100 users over the last 90 days, and write the output as JSON:
27
+
28
+ ```bash
29
+ npx make-mp-data --numUsers 100 --numEvents 10000 --numDays 90 --format json
16
30
  ```
17
- - makes ~10k events + 100 users from the last 90 days (but writes JSON)
18
31
 
32
+ ### Complex Data Models
19
33
 
20
- ```bash
34
+ Create a comprehensive dataset including events, users, groups, SCD, and lookup tables:
35
+
36
+ ```bash
21
37
  npx make-mp-data --complex
22
38
  ```
23
- - makes events + users + groups + scd + lookup tables
24
- - this includes every type of data that mixpanel supports
39
+
40
+ ### Send Data to Mixpanel
41
+
42
+ Generate and send data directly to Mixpanel using your project token:
25
43
 
26
44
  ```bash
27
45
  npx make-mp-data --token 1234
28
46
  ```
29
- - makes events + users (and send them to mixpanel)
47
+
48
+ ### Help and Options
49
+
50
+ Need more info on available options?
30
51
 
31
52
  ```bash
32
53
  npx make-mp-data --help
33
54
  ```
34
- - explains all the options you can specify
35
55
 
36
- ## customization
56
+ ## 🔧 Schema
57
+
58
+ To choose your own event and property names and values, specify a custom data model and additional options:
37
59
 
38
60
  ```bash
39
61
  npx make-mp-data [dataModel.js] [options]
40
62
  ```
41
- ex.
63
+
64
+ Example:
42
65
 
43
66
  ```bash
44
67
  npx make-mp-data ecommSpec.js --token 1234 --numDays 30 --numUsers 1000 --numEvents 1000000
45
68
  ```
46
69
 
47
- see `--help` for a full list of options
70
+ ### Data Models
71
+
72
+ Check out `./models/` for example `dataModel.js` files to get started quickly.
73
+
74
+ ## 🛠️ CLI Options
75
+
76
+ Here's a breakdown of the CLI options you can use with `make-mp-data`:
77
+
78
+ - `--numUsers`: Number of users to generate.
79
+ - `--numEvents`: Number of events to generate.
80
+ - `--numDays`: Number of days over which to spread the generated data.
81
+ - `--format`: Output format (`csv` or `json`).
82
+ - `--token`: Mixpanel project token for direct data import.
83
+ - `--region`: Mixpanel data region (`US`, `EU`).
84
+ - `--writeToDisk`: Whether to write the data to disk (`true` or `false`).
85
+ - `--verbose`: Enable verbose logging.
86
+ - `--complex`: create a complex set models including groups, SCD, and lookup tables.
87
+ - `--simple`: create a simple dataset including events, and users
88
+
89
+ ## 📄 Examples
90
+
91
+ Check out the examples directory for sample data models:
92
+
93
+ ```bash
94
+ ls ./models/
95
+ ```
96
+
97
+ These models provide a great starting point for creating your own custom data generation scripts.
98
+
99
+ ## 🤝 Contributing
48
100
 
49
- see `./models/` for a few `dataModel.js` examples...
101
+ Feel free to fork this repository and submit pull requests. Contributions are always welcome!
50
102
 
103
+ For any issues or feature requests, please create an issue on the [GitHub repository](https://github.com/ak--47/make-mp-data/issues).
package/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  #! /usr/bin/env node
2
2
 
3
-
4
3
  /*
5
4
  make fake mixpanel data easily!
6
5
  by AK
@@ -20,9 +19,17 @@ const dayjs = require("dayjs");
20
19
  const utc = require("dayjs/plugin/utc");
21
20
  dayjs.extend(utc);
22
21
  const cliParams = require("./cli.js");
23
- const { makeName, md5 } = require('ak-tools');
22
+ const { makeName, md5, clone, tracker, uid } = require('ak-tools');
24
23
  const NOW = dayjs().unix();
25
24
  let VERBOSE = false;
25
+ let isCLI = false;
26
+ const { version } = require('./package.json');
27
+
28
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41");
29
+ function track(name, props, ...rest) {
30
+ if (process.env.NODE_ENV === 'test') return;
31
+ metrics(name, props, ...rest);
32
+ }
26
33
 
27
34
  /** @typedef {import('./types.d.ts').Config} Config */
28
35
  /** @typedef {import('./types.d.ts').EventConfig} EventConfig */
@@ -43,12 +50,13 @@ async function main(config) {
43
50
  favoriteColor: ["red", "green", "blue", "yellow"],
44
51
  spiritAnimal: chance.animal.bind(chance),
45
52
  },
46
- scdProps = { NPS: u.weightedRange(0, 10, 150, 1.6) },
53
+ scdProps = {},
54
+ mirrorProps = {},
47
55
  groupKeys = [],
48
56
  groupProps = {},
49
57
  lookupTables = [],
50
- anonIds = true,
51
- sessionIds = true,
58
+ anonIds = false,
59
+ sessionIds = false,
52
60
  format = "csv",
53
61
  token = null,
54
62
  region = "US",
@@ -58,10 +66,28 @@ async function main(config) {
58
66
  } = config;
59
67
  VERBOSE = verbose;
60
68
  config.simulationName = makeName();
69
+ const { simulationName } = config;
61
70
  global.MP_SIMULATION_CONFIG = config;
62
71
  const uuidChance = new Chance(seed);
72
+ const runId = uid(42);
73
+ track('start simulation', {
74
+ runId,
75
+ seed,
76
+ numEvents,
77
+ numUsers,
78
+ numDays,
79
+ events,
80
+ anonIds,
81
+ sessionIds,
82
+ format,
83
+ token,
84
+ region,
85
+ writeToDisk,
86
+ isCLI,
87
+ version
88
+ });
63
89
  log(`------------------SETUP------------------`);
64
- log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
90
+ log(`\nyour data simulation will heretofore be known as: \n\n\t${simulationName.toUpperCase()}...\n`);
65
91
  log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk }, null, 2));
66
92
  log(`------------------SETUP------------------`, "\n");
67
93
 
@@ -100,7 +126,12 @@ async function main(config) {
100
126
  const firstEvents = events.filter((e) => e.isFirstEvent);
101
127
  const eventData = enrichArray([], { hook, type: "event", config });
102
128
  const userProfilesData = enrichArray([], { hook, type: "user", config });
103
- let scdTableData = enrichArray([], { hook, type: "scd", config });
129
+ const scdTableKeys = Object.keys(scdProps);
130
+ const scdTableData = [];
131
+ for (const [index, key] of scdTableKeys.entries()) {
132
+ scdTableData[index] = enrichArray([], { hook, type: "scd", config, scdKey: key });
133
+ }
134
+ // const scdTableData = enrichArray([], { hook, type: "scd", config });
104
135
  const groupProfilesData = enrichArray([], { hook, type: "groups", config });
105
136
  const lookupTableData = enrichArray([], { hook, type: "lookups", config });
106
137
  const avgEvPerUser = Math.floor(numEvents / numUsers);
@@ -112,8 +143,13 @@ async function main(config) {
112
143
  const user = generateUser();
113
144
  const { distinct_id, $created, anonymousIds, sessionIds } = user;
114
145
  userProfilesData.hPush(makeProfile(userProps, user));
115
- const mutations = chance.integer({ min: 1, max: 10 });
116
- scdTableData.hPush(makeSCD(scdProps, distinct_id, mutations, $created));
146
+
147
+ //scd loop
148
+ for (const [index, key] of scdTableKeys.entries()) {
149
+ const mutations = chance.integer({ min: 1, max: 10 });
150
+ scdTableData[index].hPush(makeSCD(scdProps[key], key, distinct_id, mutations, $created));
151
+ }
152
+
117
153
  const numEventsThisUser = Math.round(
118
154
  chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / u.integer(3, 7) })
119
155
  );
@@ -150,7 +186,7 @@ async function main(config) {
150
186
  }
151
187
 
152
188
  //flatten SCD
153
- scdTableData = scdTableData.flat();
189
+ scdTableData.forEach((table, index) => scdTableData[index] = table.flat());
154
190
 
155
191
  log("\n");
156
192
 
@@ -186,25 +222,56 @@ async function main(config) {
186
222
  }
187
223
  lookupTableData.hPush({ key, data });
188
224
  }
189
- const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, folder } =
225
+
226
+ // deal with mirror props
227
+ let mirrorEventData = [];
228
+ const mirrorPropKeys = Object.keys(mirrorProps);
229
+ if (mirrorPropKeys.length) {
230
+ mirrorEventData = clone(eventData);
231
+ for (const row of mirrorEventData) {
232
+ for (const key of mirrorPropKeys) {
233
+ if (mirrorProps[key]?.events?.includes(row?.event)) row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
234
+ if (mirrorProps[key]?.events === "*") row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
235
+ }
236
+ }
237
+ }
238
+
239
+ const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder } =
190
240
  buildFileNames(config);
191
241
  const pairs = [
192
- [eventFiles, eventData],
193
- [userFiles, userProfilesData],
242
+ [eventFiles, [eventData]],
243
+ [userFiles, [userProfilesData]],
194
244
  [scdFiles, scdTableData],
195
245
  [groupFiles, groupProfilesData],
196
246
  [lookupFiles, lookupTableData],
247
+ [mirrorFiles, [mirrorEventData]],
197
248
  ];
198
249
  log("\n");
199
250
  log(`---------------SIMULATION----------------`, "\n");
200
251
 
201
252
  if (!writeToDisk && !token) {
253
+ track('end simulation', {
254
+ runId,
255
+ seed,
256
+ numEvents,
257
+ numUsers,
258
+ numDays,
259
+ events,
260
+ anonIds,
261
+ sessionIds,
262
+ format,
263
+ token,
264
+ region,
265
+ writeToDisk,
266
+ isCLI
267
+ });
202
268
  return {
203
269
  eventData,
204
270
  userProfilesData,
205
271
  scdTableData,
206
272
  groupProfilesData,
207
273
  lookupTableData,
274
+ mirrorEventData,
208
275
  import: {},
209
276
  files: []
210
277
  };
@@ -212,32 +279,39 @@ async function main(config) {
212
279
  log(`-----------------WRITES------------------`, `\n\n`);
213
280
  //write the files
214
281
  if (writeToDisk) {
215
- if (verbose) log(`writing files... for ${config.simulationName}`);
216
- loopFiles: for (const pair of pairs) {
217
- const [paths, data] = pair;
282
+ if (verbose) log(`writing files... for ${simulationName}`);
283
+ loopFiles: for (const ENTITY of pairs) {
284
+ const [paths, data] = ENTITY;
218
285
  if (!data.length) continue loopFiles;
219
- for (const path of paths) {
220
- let datasetsToWrite;
221
- if (data?.[0]?.["key"]) datasetsToWrite = data.map((d) => d.data);
222
- else datasetsToWrite = [data];
223
- for (const writeData of datasetsToWrite) {
224
- //if it's a lookup table, it's always a CSV
225
- if (format === "csv" || path.includes("-LOOKUP.csv")) {
226
- log(`\twriting ${path}`);
227
- const columns = u.getUniqueKeys(writeData);
228
- //papa parse needs nested JSON stringified
229
- writeData.forEach((e) => {
230
- for (const key in e) {
231
- if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
232
- }
233
- });
234
- const csv = Papa.unparse(writeData, { columns });
235
- await touch(path, csv);
236
- } else {
237
- const ndjson = data.map((d) => JSON.stringify(d)).join("\n");
238
- await touch(path, ndjson, false);
239
- }
286
+ for (const [index, path] of paths.entries()) {
287
+ let TABLE;
288
+ //group + lookup tables are structured differently
289
+ if (data?.[index]?.["key"]) {
290
+ TABLE = data[index].data;
291
+ }
292
+ else {
293
+ TABLE = data[index];
240
294
  }
295
+
296
+ log(`\twriting ${path}`);
297
+ //if it's a lookup table, it's always a CSV
298
+ if (format === "csv" || path.includes("-LOOKUP.csv")) {
299
+ const columns = u.getUniqueKeys(TABLE);
300
+ //papa parse needs eac nested field JSON stringified
301
+ TABLE.forEach((e) => {
302
+ for (const key in e) {
303
+ if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
304
+ }
305
+ });
306
+
307
+ const csv = Papa.unparse(TABLE, { columns });
308
+ await touch(path, csv);
309
+ }
310
+ else {
311
+ const ndjson = TABLE.map((d) => JSON.stringify(d)).join("\n");
312
+ await touch(path, ndjson, false);
313
+ }
314
+
241
315
  }
242
316
  }
243
317
  }
@@ -298,9 +372,25 @@ async function main(config) {
298
372
 
299
373
  }
300
374
  log(`\n-----------------WRITES------------------`, "\n");
375
+ track('end simulation', {
376
+ runId,
377
+ seed,
378
+ numEvents,
379
+ numUsers,
380
+ numDays,
381
+ events,
382
+ anonIds,
383
+ sessionIds,
384
+ format,
385
+ token,
386
+ region,
387
+ writeToDisk,
388
+ isCLI,
389
+ version
390
+ });
301
391
  return {
302
392
  import: importResults,
303
- files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, folder],
393
+ files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder],
304
394
  };
305
395
  }
306
396
 
@@ -323,16 +413,23 @@ function makeProfile(props, defaults) {
323
413
 
324
414
  return profile;
325
415
  }
326
-
327
- function makeSCD(props, distinct_id, mutations, $created) {
328
- if (JSON.stringify(props) === "{}") return [];
416
+ /**
417
+ * @param {import('./types.d.ts').valueValid} prop
418
+ * @param {string} scdKey
419
+ * @param {string} distinct_id
420
+ * @param {number} mutations
421
+ * @param {string} $created
422
+ */
423
+ function makeSCD(prop, scdKey, distinct_id, mutations, $created) {
424
+ if (JSON.stringify(prop) === "{}") return {};
425
+ if (JSON.stringify(prop) === "[]") return [];
329
426
  const scdEntries = [];
330
427
  let lastInserted = dayjs($created);
331
428
  const deltaDays = dayjs().diff(lastInserted, "day");
332
429
 
333
430
  for (let i = 0; i < mutations; i++) {
334
431
  if (lastInserted.isAfter(dayjs())) break;
335
- const scd = makeProfile(props, { distinct_id });
432
+ const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
336
433
  scd.startTime = lastInserted.toISOString();
337
434
  lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
338
435
  scd.insertTime = lastInserted.toISOString();
@@ -421,12 +518,21 @@ function buildFileNames(config) {
421
518
  const writePaths = {
422
519
  eventFiles: [path.join(writeDir, `${simName}-EVENTS.${extension}`)],
423
520
  userFiles: [path.join(writeDir, `${simName}-USERS.${extension}`)],
424
- scdFiles: [path.join(writeDir, `${simName}-SCD.${extension}`)],
521
+ scdFiles: [],
522
+ mirrorFiles: [path.join(writeDir, `${simName}-EVENTS-FUTURE-MIRROR.${extension}`)],
425
523
  groupFiles: [],
426
524
  lookupFiles: [],
427
525
  folder: writeDir,
428
526
  };
429
527
 
528
+ //add SCD files
529
+ const scdKeys = Object.keys(config?.scdProps || {});
530
+ for (const key of scdKeys) {
531
+ writePaths.scdFiles.push(
532
+ path.join(writeDir, `${simName}-${key}-SCD.${extension}`)
533
+ );
534
+ }
535
+
430
536
  for (const groupPair of groupKeys) {
431
537
  const groupKey = groupPair[0];
432
538
  writePaths.groupFiles.push(
@@ -447,10 +553,10 @@ function buildFileNames(config) {
447
553
 
448
554
 
449
555
  function enrichArray(arr = [], opts = {}) {
450
- const { hook = a => a, type = "", config = {} } = opts;
556
+ const { hook = a => a, type = "", ...rest } = opts;
451
557
 
452
558
  function transformThenPush(item) {
453
- return arr.push(hook(item, type, config));
559
+ return arr.push(hook(item, type, rest));
454
560
  }
455
561
 
456
562
  arr.hPush = transformThenPush;
@@ -462,6 +568,7 @@ function enrichArray(arr = [], opts = {}) {
462
568
 
463
569
  // this is for CLI
464
570
  if (require.main === module) {
571
+ isCLI = true;
465
572
  const args = cliParams();
466
573
  const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false } = args;
467
574
  const suppliedConfig = args._[0];
@@ -543,5 +650,13 @@ if (require.main === module) {
543
650
 
544
651
 
545
652
  function log(...args) {
653
+ const cwd = process.cwd(); // Get the current working directory
654
+
655
+ for (let i = 0; i < args.length; i++) {
656
+ // Replace occurrences of the current working directory with "./" in string arguments
657
+ if (typeof args[i] === 'string') {
658
+ args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
659
+ }
660
+ }
546
661
  if (VERBOSE) console.log(...args);
547
662
  }
package/models/complex.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * This is the default configuration file for the data generator
2
+ * This is the default configuration file for the data generator in COMPLEX mode
3
3
  * notice how the config object is structured, and see it's type definition in ./types.d.ts
4
4
  * feel free to modify this file to customize the data you generate
5
5
  * see helper functions in utils.js for more ways to generate data
@@ -59,7 +59,7 @@ const config = {
59
59
  watchTimeSec: weightedRange(10, 600, 1000, .25),
60
60
  quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
61
61
  format: ["mp4", "avi", "mov", "mpg"],
62
- uploader_id: chance.guid.bind(chance)
62
+ video_id: weightedRange(1, 50000, 420000, 1.4),
63
63
 
64
64
  }
65
65
  },
@@ -120,12 +120,22 @@ const config = {
120
120
 
121
121
  },
122
122
 
123
+ /** each generates it's own table */
123
124
  scdProps: {
124
125
  plan: ["free", "free", "free", "free", "basic", "basic", "basic", "premium", "premium", "enterprise"],
125
126
  MRR: weightedRange(0, 10000, 1000, .15),
126
127
  NPS: weightedRange(0, 10, 150, 2),
127
- marketingOptIn: [true, true, false],
128
- dateOfRenewal: date(100, false),
128
+ subscribed: [true, true, true, true, true, true, false, false, false, false, "it's complicated"],
129
+ renewalDate: date(100, false),
130
+ },
131
+
132
+ mirrorProps: {
133
+ isBot: { events: "*", values: [false, false, false, false, true] },
134
+ profit: { events: ["checkout"], values: [4, 2, 42, 420] },
135
+ watchTimeSec: {
136
+ events: ["watch video"],
137
+ values: weightedRange(50, 1200, 247, 6)
138
+ }
129
139
  },
130
140
 
131
141
  /*
@@ -133,7 +143,8 @@ const config = {
133
143
  each pair represents a group_key and the number of profiles for that key
134
144
  */
135
145
  groupKeys: [
136
- ['company_id', 350],
146
+ ['company_id', 500],
147
+ ['room_id', 10000],
137
148
 
138
149
  ],
139
150
  groupProps: {
@@ -144,6 +155,15 @@ const config = {
144
155
  "industry": ["tech", "finance", "healthcare", "education", "government", "non-profit"],
145
156
  "segment": ["enterprise", "SMB", "mid-market"],
146
157
  "products": [["core"], ["core"], ["core", "add-ons"], ["core", "pro-serve"], ["core", "add-ons", "pro-serve"], ["core", "BAA", "enterprise"], ["free"], ["free"], ["free", "addons"]],
158
+ },
159
+ room_id: {
160
+ $name: () => { return `#${chance.word({ length: integer(4, 24), capitalize: true })}`; },
161
+ $email: ["public", "private"],
162
+ "room provider": ["partner", "core", "core", "core"],
163
+ "room capacity": weightedRange(3, 1000000),
164
+ "isPublic": [true, false, false, false, false],
165
+ "country": chance.country.bind(chance),
166
+ "isVerified": [true, true, false, false, false],
147
167
  }
148
168
  },
149
169
 
@@ -153,29 +173,7 @@ const config = {
153
173
  entries: 1000,
154
174
  attributes: {
155
175
  category: [
156
- "Books",
157
- "Movies",
158
- "Music",
159
- "Games",
160
- "Electronics",
161
- "Computers",
162
- "Smart Home",
163
- "Home",
164
- "Garden & Tools",
165
- "Pet Supplies",
166
- "Food & Grocery",
167
- "Beauty",
168
- "Health",
169
- "Toys",
170
- "Kids",
171
- "Baby",
172
- "Handmade",
173
- "Sports",
174
- "Outdoors",
175
- "Automotive",
176
- "Industrial",
177
- "Entertainment",
178
- "Art"
176
+ "Books", "Movies", "Music", "Games", "Electronics", "Computers", "Smart Home", "Home", "Garden & Tools", "Pet Supplies", "Food & Grocery", "Beauty", "Health", "Toys", "Kids", "Baby", "Handmade", "Sports", "Outdoors", "Automotive", "Industrial", "Entertainment", "Art"
179
177
  ],
180
178
  "demand": ["high", "medium", "medium", "low"],
181
179
  "supply": ["high", "medium", "medium", "low"],
@@ -185,6 +183,20 @@ const config = {
185
183
  "reviews": weightedRange(0, 35)
186
184
  }
187
185
 
186
+ },
187
+ {
188
+ key: "video_id",
189
+ entries: 50000,
190
+ attributes: {
191
+ isFlagged: [true, false, false, false, false],
192
+ copyright: ["all rights reserved", "creative commons", "creative commons", "public domain", "fair use"],
193
+ uploader_id: chance.guid.bind(chance),
194
+ "uploader influence": ["low", "low", "low", "medium", "medium", "high"],
195
+ rating: weightedRange(1, 5),
196
+ thumbs: weightedRange(0, 35),
197
+ rating: ["G", "PG", "PG-13", "R", "NC-17", "PG-13", "R", "NC-17", "R", "PG", "PG"]
198
+ }
199
+
188
200
  }
189
201
  ],
190
202
 
@@ -18,17 +18,17 @@ const config = {
18
18
  groupProperties: {}
19
19
  };
20
20
 
21
- let formats = ['2160p', '1440p', '1080p', '720p', '480p', '360p', '240p'];
22
- let ratios = ['4:3', '16:10', '16:9'];
23
- let containers = ["WEBM", ["MPG", "MP2", "MPEG"], ["MP4", "M4P", "M4V"], ["AVI", "WMV"], ["MOV", "QT"], ["FLV", "SWF"], "AVCHD"];
24
- let hashtags = ["#AK", "#bitcoin", "#cureForMiley", "#faceValue", "#blm", "#fwiw", "#inappropriateFuneralSongs", "#jurassicPork", "#lolCats", "#wheatForSheep", "#momTexts", "#myWeirdGymStory", "#poppy", "#resist", "#tbt", "#wilson", "#worstGiftEver", "#yolo", "#phish", "#crypto", "#memes", "#wrongMovie", "#careerEndingTwitterTypos", "#twoThingsThatDontMix"];
25
- let platforms = ["Web", "Mobile Web", "Native (Android)", "Native (iOS)", "Native (Desktop)", "IoT"];
26
- let plans = ['free', 'premium', 'casual', 'influencer'];
27
- let categories = ["Product reviews video", "How-to videos", "Vlogs", "Gaming videos", "Comedy/skit videos", "Haul videos", "Memes/tags", "Favorites/best of", "Educational videos", "Unboxing videos", "Q&A videos", "Collection", "Prank videos"];
28
- let marketingChannels = ["Organic", "Organic", "Organic", "Organic", "Instagram Ads", "Facebook Ads", "Google Ads", "Youtube Ads", "Instagram Post", "Instagram Post", "Facebook Post"];
29
-
30
21
 
31
22
  function generateVideoMeta() {
23
+ let formats = ['2160p', '1440p', '1080p', '720p', '480p', '360p', '240p'];
24
+ let ratios = ['4:3', '16:10', '16:9'];
25
+ let containers = ["WEBM", ["MPG", "MP2", "MPEG"], ["MP4", "M4P", "M4V"], ["AVI", "WMV"], ["MOV", "QT"], ["FLV", "SWF"], "AVCHD"];
26
+ let hashtags = ["#AK", "#bitcoin", "#cureForMiley", "#faceValue", "#blm", "#fwiw", "#inappropriateFuneralSongs", "#jurassicPork", "#lolCats", "#wheatForSheep", "#momTexts", "#myWeirdGymStory", "#poppy", "#resist", "#tbt", "#wilson", "#worstGiftEver", "#yolo", "#phish", "#crypto", "#memes", "#wrongMovie", "#careerEndingTwitterTypos", "#twoThingsThatDontMix"];
27
+ let platforms = ["Web", "Mobile Web", "Native (Android)", "Native (iOS)", "Native (Desktop)", "IoT"];
28
+ let plans = ['free', 'premium', 'casual', 'influencer'];
29
+ let categories = ["Product reviews video", "How-to videos", "Vlogs", "Gaming videos", "Comedy/skit videos", "Haul videos", "Memes/tags", "Favorites/best of", "Educational videos", "Unboxing videos", "Q&A videos", "Collection", "Prank videos"];
30
+ let marketingChannels = ["Organic", "Organic", "Organic", "Organic", "Instagram Ads", "Facebook Ads", "Google Ads", "Youtube Ads", "Instagram Post", "Instagram Post", "Facebook Post"];
31
+
32
32
 
33
33
  let videoTemplate = {
34
34
  videoFormatInfo: {
package/models/simple.js CHANGED
@@ -1,3 +1,13 @@
1
+ /**
2
+ * This is the default configuration file for the data generator in SIMPLE mode
3
+ * notice how the config object is structured, and see it's type definition in ./types.d.ts
4
+ * feel free to modify this file to customize the data you generate
5
+ * see helper functions in utils.js for more ways to generate data
6
+ */
7
+
8
+
9
+
10
+
1
11
  const Chance = require('chance');
2
12
  const chance = new Chance();
3
13
  const dayjs = require("dayjs");
@@ -24,9 +34,9 @@ const config = {
24
34
 
25
35
  events: [
26
36
  {
27
- "event": "checkout",
28
- "weight": 2,
29
- "properties": {
37
+ event: "checkout",
38
+ weight: 2,
39
+ properties: {
30
40
  amount: weightedRange(5, 500, 1000, .25),
31
41
  currency: ["USD", "CAD", "EUR", "BTC", "ETH", "JPY"],
32
42
  coupon: ["none", "none", "none", "none", "10%OFF", "20%OFF", "10%OFF", "20%OFF", "30%OFF", "40%OFF", "50%OFF"],
@@ -35,9 +45,9 @@ const config = {
35
45
  }
36
46
  },
37
47
  {
38
- "event": "add to cart",
39
- "weight": 4,
40
- "properties": {
48
+ event: "add to cart",
49
+ weight: 4,
50
+ properties: {
41
51
  amount: weightedRange(5, 500, 1000, .25),
42
52
  rating: weightedRange(1, 5),
43
53
  reviews: weightedRange(0, 35),
@@ -48,17 +58,17 @@ const config = {
48
58
  }
49
59
  },
50
60
  {
51
- "event": "page view",
52
- "weight": 10,
53
- "properties": {
61
+ event: "page view",
62
+ weight: 10,
63
+ properties: {
54
64
  page: ["/", "/", "/help", "/account", "/watch", "/listen", "/product", "/people", "/peace"],
55
65
  utm_source: ["$organic", "$organic", "$organic", "$organic", "google", "google", "google", "facebook", "facebook", "twitter", "linkedin"],
56
66
  }
57
67
  },
58
68
  {
59
- "event": "watch video",
60
- "weight": 8,
61
- "properties": {
69
+ event: "watch video",
70
+ weight: 8,
71
+ properties: {
62
72
  videoCategory: weighList(videoCategories, integer(0, 9)),
63
73
  isFeaturedItem: [true, false, false],
64
74
  watchTimeSec: weightedRange(10, 600, 1000, .25),
@@ -69,9 +79,9 @@ const config = {
69
79
  }
70
80
  },
71
81
  {
72
- "event": "view item",
73
- "weight": 8,
74
- "properties": {
82
+ event: "view item",
83
+ weight: 8,
84
+ properties: {
75
85
  isFeaturedItem: [true, false, false],
76
86
  itemCategory: weighList(itemCategories, integer(0, 27)),
77
87
  dateItemListed: date(30, true, 'YYYY-MM-DD'),
@@ -79,9 +89,9 @@ const config = {
79
89
  }
80
90
  },
81
91
  {
82
- "event": "save item",
83
- "weight": 5,
84
- "properties": {
92
+ event: "save item",
93
+ weight: 5,
94
+ properties: {
85
95
  isFeaturedItem: [true, false, false],
86
96
  itemCategory: weighList(itemCategories, integer(0, 27)),
87
97
  dateItemListed: date(30, true, 'YYYY-MM-DD'),
@@ -89,10 +99,10 @@ const config = {
89
99
  }
90
100
  },
91
101
  {
92
- "event": "sign up",
93
- "isFirstEvent": true,
94
- "weight": 0,
95
- "properties": {
102
+ event: "sign up",
103
+ isFirstEvent: true,
104
+ weight: 0,
105
+ properties: {
96
106
  variants: ["A", "B", "C", "Control"],
97
107
  flows: ["new", "existing", "loyal", "churned"],
98
108
  flags: ["on", "off"],
@@ -118,6 +128,15 @@ const config = {
118
128
  },
119
129
 
120
130
  scdProps: {},
131
+ mirrorProps: {
132
+ isBot: { events: "*", values: [false, false, false, false, true] },
133
+ profit: { events: ["checkout"], values: [4, 2, 42, 420] },
134
+ watchTimeSec: {
135
+ events: ["watch video"],
136
+ values: weightedRange(50, 1200, 247, 6)
137
+ }
138
+
139
+ },
121
140
 
122
141
  /*
123
142
  for group analytics keys, we need an array of arrays [[],[],[]]
package/package.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "1.2.25",
3
+ "version": "1.3.01",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "main": "index.js",
6
6
  "types": "types.d.ts",
7
7
  "scripts": {
8
8
  "start": "node index.js",
9
9
  "dev": "./scripts/go.sh",
10
- "complex": "nodemon index.js --complex",
11
- "simple": "nodemon index.js",
10
+ "complex": "nodemon index.js --complex --e 10000 --u 100",
11
+ "simple": "nodemon index.js --simple --e 10000 --u 100",
12
12
  "prune": "rm ./data/*",
13
13
  "post": "npm publish",
14
- "test": "jest --runInBand",
14
+ "test": "NODE_ENV=test jest --runInBand",
15
15
  "deps": "sh ./scripts/deps.sh"
16
16
  },
17
17
  "repository": {
@@ -40,7 +40,7 @@
40
40
  },
41
41
  "homepage": "https://github.com/ak--47/make-mp-data#readme",
42
42
  "dependencies": {
43
- "ak-tools": "^1.0.57",
43
+ "ak-tools": "^1.0.58",
44
44
  "chance": "^1.1.11",
45
45
  "dayjs": "^1.11.11",
46
46
  "mixpanel-import": "^2.5.51",
package/tests/e2e.test.js CHANGED
@@ -24,7 +24,7 @@ describe('module', () => {
24
24
  expect(eventData.length).toBeGreaterThan(980);
25
25
  expect(groupProfilesData.length).toBe(0);
26
26
  expect(lookupTableData.length).toBe(0);
27
- expect(scdTableData.length).toBeGreaterThan(200);
27
+ expect(scdTableData.length).toBe(0);
28
28
  expect(userProfilesData.length).toBe(100);
29
29
 
30
30
  }, timeout);
@@ -46,10 +46,10 @@ describe('module', () => {
46
46
  const results = await generate({ ...complex, verbose: true, writeToDisk: false, numEvents: 1100, numUsers: 100, seed: "deal with it" });
47
47
  const { eventData, groupProfilesData, lookupTableData, scdTableData, userProfilesData } = results;
48
48
  expect(eventData.length).toBeGreaterThan(980);
49
- expect(groupProfilesData[0]?.data?.length).toBe(350);
50
- expect(lookupTableData.length).toBe(1);
49
+ expect(groupProfilesData[0]?.data?.length).toBe(500);
50
+ expect(lookupTableData.length).toBe(2);
51
51
  expect(lookupTableData[0].data.length).toBe(1000);
52
- expect(scdTableData.length).toBeGreaterThan(200);
52
+ expect(scdTableData.length).toBe(5);
53
53
  expect(userProfilesData.length).toBe(100);
54
54
 
55
55
  }, timeout);
@@ -61,21 +61,30 @@ describe('module', () => {
61
61
  expect(eventData.length).toBeGreaterThan(980);
62
62
  expect(groupProfilesData.length).toBe(0);
63
63
  expect(lookupTableData.length).toBe(0);
64
- expect(scdTableData.length).toBeGreaterThan(200);
64
+ expect(scdTableData.length).toBe(0);
65
65
  expect(userProfilesData.length).toBe(100);
66
66
 
67
67
  }, timeout);
68
68
 
69
+ test('fails with invalid configuration', async () => {
70
+ try {
71
+ await generate({ numUsers: -10 });
72
+ } catch (e) {
73
+ expect(e).toBeDefined();
74
+ }
75
+ }, timeout);
76
+
77
+
69
78
 
70
79
  });
71
80
 
72
81
  describe('cli', () => {
73
82
  test('works as CLI (complex)', async () => {
74
83
  console.log('COMPLEX CLI TEST');
75
- const run = execSync(`node ./index.js --numEvents 1000 --numUsers 100 --seed "deal with it" --complex`);
76
- expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
84
+ const run = execSync(`node ./index.js --numEvents 1000 --numUsers 100 --seed "deal with it" --complex`, { stdio: 'ignore' });
85
+ // expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
77
86
  const csvs = (await u.ls('./data')).filter(a => a.includes('.csv'));
78
- expect(csvs.length).toBe(5);
87
+ expect(csvs.length).toBe(12);
79
88
  clearData();
80
89
  }, timeout);
81
90
 
@@ -84,7 +93,7 @@ describe('cli', () => {
84
93
  const run = execSync(`node ./index.js --numEvents 1000 --numUsers 100 --seed "deal with it"`);
85
94
  expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
86
95
  const csvs = (await u.ls('./data')).filter(a => a.includes('.csv'));
87
- expect(csvs.length).toBe(2);
96
+ expect(csvs.length).toBe(3);
88
97
  clearData();
89
98
  }, timeout);
90
99
 
@@ -93,12 +102,13 @@ describe('cli', () => {
93
102
  const run = execSync(`node ./index.js ./models/deepNest.js`);
94
103
  expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
95
104
  const csvs = (await u.ls('./data')).filter(a => a.includes('.csv'));
96
- expect(csvs.length).toBe(3);
105
+ expect(csvs.length).toBe(2);
97
106
  clearData();
98
107
  }, timeout);
99
108
 
100
109
  });
101
110
 
111
+
102
112
  describe('options + tweaks', () => {
103
113
  test('creates sessionIds', async () => {
104
114
  const results = await generate({ writeToDisk: false, numEvents: 1000, numUsers: 100, sessionIds: true });
@@ -60,7 +60,7 @@ describe('utils', () => {
60
60
  const generatedPerson = person();
61
61
  expect(generatedPerson).toHaveProperty('$name');
62
62
  expect(generatedPerson).toHaveProperty('$email');
63
- expect(generatedPerson).toHaveProperty('$avatar');
63
+ expect(generatedPerson).toHaveProperty('$avatar');
64
64
  });
65
65
 
66
66
 
@@ -96,6 +96,38 @@ describe('utils', () => {
96
96
  expect(result).toBe('test');
97
97
  });
98
98
 
99
+ test('choose: non-function / non-array', () => {
100
+ expect(choose('test')).toBe('test');
101
+ expect(choose(123)).toBe(123);
102
+ });
103
+
104
+ test('choose: nested functions', () => {
105
+ const result = choose(() => () => () => 'nested');
106
+ expect(result).toBe('nested');
107
+ });
108
+
109
+ test('weightedRange: within range', () => {
110
+ const values = weightedRange(5, 15, 100);
111
+ expect(values.every(v => v >= 5 && v <= 15)).toBe(true);
112
+ expect(values.length).toBe(100);
113
+ });
114
+
115
+ test('applySkew: skews', () => {
116
+ const value = boxMullerRandom();
117
+ const skewedValue = applySkew(value, .25);
118
+ expect(Math.abs(skewedValue)).toBeGreaterThanOrEqual(Math.abs(value));
119
+ });
120
+
121
+ test('mapToRange: works', () => {
122
+ const value = 0;
123
+ const mean = 10;
124
+ const sd = 5;
125
+ const mappedValue = mapToRange(value, mean, sd);
126
+ expect(mappedValue).toBe(10);
127
+ });
128
+
129
+
130
+
99
131
  test('exhaust: elements', () => {
100
132
  const arr = [1, 2, 3];
101
133
  const exhaustFn = exhaust([...arr]);
@@ -161,7 +193,7 @@ describe('utils', () => {
161
193
  expect(typeof emojis).toBe('string');
162
194
  const emojiArray = emojis.split(', ');
163
195
  expect(emojiArray.length).toBeLessThanOrEqual(10); // Assuming max default is 10
164
-
196
+
165
197
  });
166
198
 
167
199
 
package/types.d.ts CHANGED
@@ -1,82 +1,114 @@
1
1
  declare namespace main {
2
- type primitives = string | number | boolean | Date | Object;
3
- type valueValid =
4
- | primitives
5
- | primitives[]
6
- | (() => primitives | primitives[]);
7
-
8
- export interface Config {
9
- token?: string;
10
- seed?: string;
11
- numDays?: number;
12
- numEvents?: number;
13
- numUsers?: number;
14
- format?: "csv" | "json";
15
- region?: string;
16
- events?: EventConfig[];
17
- superProps?: Record<string, valueValid>;
18
- userProps?: Record<string, valueValid>;
19
- scdProps?: Record<string, valueValid>;
20
- groupKeys?: [string, number][];
21
- groupProps?: Record<string, GroupProperty>; // Adjust according to usage
22
- lookupTables?: LookupTable[];
23
- writeToDisk?: boolean;
24
- simulationName?: string;
25
- verbose?: boolean;
26
- anonIds?: boolean;
27
- sessionIds?: boolean;
28
- hook?: Hook;
2
+ type Primitives = string | number | boolean | Date | Record<string, any>;
3
+
4
+ // Recursive type to handle functions returning functions that eventually return Primitives or arrays of Primitives
5
+ type ValueValid =
6
+ | Primitives
7
+ | ValueValid[]
8
+ | (() => ValueValid);
9
+
10
+ // MAIN CONFIGURATION OBJECT
11
+ export interface Config {
12
+ token?: string;
13
+ seed?: string;
14
+ numDays?: number;
15
+ numEvents?: number;
16
+ numUsers?: number;
17
+ format?: "csv" | "json";
18
+ region?: string;
19
+ events?: EventConfig[];
20
+ superProps?: Record<string, ValueValid>;
21
+ userProps?: Record<string, ValueValid>;
22
+ scdProps?: Record<string, ValueValid>;
23
+ mirrorProps?: Record<string, MirrorProps>;
24
+ groupKeys?: [string, number][];
25
+ groupProps?: Record<string, Record<string, ValueValid>>;
26
+ lookupTables?: LookupTable[];
27
+ writeToDisk?: boolean;
28
+ simulationName?: string;
29
+ verbose?: boolean;
30
+ anonIds?: boolean;
31
+ sessionIds?: boolean;
32
+ hook?: Hook;
33
+ }
34
+
35
+ export type Hook = (record: any, type: string, meta: any) => any;
36
+
37
+ export interface EventConfig {
38
+ event?: string;
39
+ weight?: number;
40
+ properties?: Record<string, ValueValid>;
41
+ isFirstEvent?: boolean;
42
+ }
43
+
44
+ export interface MirrorProps {
45
+ events: string[] | "*";
46
+ values: ValueValid[];
47
+ }
48
+
49
+ export interface LookupTable {
50
+ key: string;
51
+ entries: number;
52
+ attributes: Record<string, ValueValid>;
53
+ }
54
+
55
+ export interface SCDTable {
56
+ distinct_id: string;
57
+ insertTime: string;
58
+ startTime: string;
59
+ [key: string]: ValueValid;
60
+ }
61
+
62
+ export type Result = {
63
+ eventData: EventData[];
64
+ userProfilesData: any[];
65
+ scdTableData: any[];
66
+ groupProfilesData: GroupProfilesData[];
67
+ lookupTableData: LookupTableData[];
68
+ import?: ImportResults;
69
+ files?: string[];
70
+ };
71
+
72
+ export interface EventData {
73
+ event: string;
74
+ $source: string;
75
+ time: string;
76
+ $device_id?: string;
77
+ $session_id?: string;
78
+ $user_id?: string;
79
+ [key: string]: any;
80
+ }
81
+
82
+ export interface GroupProfilesData {
83
+ key: string;
84
+ data: any[];
85
+ }
86
+
87
+ export interface LookupTableData {
88
+ key: string;
89
+ data: any[];
90
+ }
91
+
92
+ export interface ImportResults {
93
+ events: ImportResult;
94
+ users: ImportResult;
95
+ groups: ImportResult[];
96
+ }
97
+
98
+ export interface ImportResult {
99
+ success: number;
100
+ bytes: number;
101
+ }
29
102
  }
30
-
31
- export type Hook = (record: any, type: string, meta: any) => any;
32
-
33
- interface EventConfig {
34
- event?: string;
35
- weight?: number;
36
- properties?: {
37
- [key: string]: valueValid; // Consider refining based on actual properties used
38
- };
39
- isFirstEvent?: boolean;
40
- }
41
-
42
- interface GroupProperty {
43
- [key?: string]: valueValid;
44
- }
45
-
46
- interface LookupTable {
47
- key: string;
48
- entries: number;
49
- attributes: {
50
- [key?: string]: valueValid;
51
- };
52
- }
53
-
54
- type Result = {
55
- eventData: {
56
- event: any;
57
- $source: string;
58
- }[];
59
- userProfilesData: any[];
60
- scdTableData: any[];
61
- groupProfilesData: {
62
- key: string;
63
- data: any[];
64
- }[];
65
- lookupTableData: {
66
- key: string;
67
- data: any[];
68
- }[];
69
- import?: undefined;
70
- files?: undefined;
71
- };
72
- }
73
-
74
- /**
75
- * Mixpanel Data Generator
76
- * model events, users, groups, and lookup tables (and SCD props!)
77
- * @example
78
- * const gen = require('make-mp-data')
79
- * const dta = gen({writeToDisk: false})
80
- */
81
- declare function main(config: main.Config): Promise<main.Result>;
82
- export = main;
103
+
104
+ /**
105
+ * Mixpanel Data Generator
106
+ * model events, users, groups, and lookup tables (and SCD props!)
107
+ * @example
108
+ * const gen = require('make-mp-data')
109
+ * const dta = gen({writeToDisk: false})
110
+ */
111
+ declare function main(config: main.Config): Promise<main.Result>;
112
+
113
+ export = main;
114
+
package/utils.js CHANGED
@@ -155,7 +155,7 @@ function mapToRange(value, mean, sd) {
155
155
  return Math.round(value * sd + mean);
156
156
  };
157
157
 
158
- function weightedRange(min, max, size = 100, skew = 1) {
158
+ function unOptimizedWeightedRange(min, max, size = 100, skew = 1) {
159
159
  const mean = (max + min) / 2;
160
160
  const sd = (max - min) / 4;
161
161
  let array = [];
@@ -176,6 +176,23 @@ function weightedRange(min, max, size = 100, skew = 1) {
176
176
  return array;
177
177
  };
178
178
 
179
+ // optimized weighted range
180
+ function weightedRange(min, max, size = 100, skew = 1) {
181
+ const mean = (max + min) / 2;
182
+ const sd = (max - min) / 4;
183
+ const array = [];
184
+ while (array.length < size) {
185
+ const normalValue = boxMullerRandom();
186
+ const skewedValue = applySkew(normalValue, skew);
187
+ const mappedValue = mapToRange(skewedValue, mean, sd);
188
+ if (mappedValue >= min && mappedValue <= max) {
189
+ array.push(mappedValue);
190
+ }
191
+ }
192
+ return array;
193
+ }
194
+
195
+
179
196
  function progress(thing, p) {
180
197
  readline.cursorTo(process.stdout, 0);
181
198
  process.stdout.write(`${thing} processed ... ${comma(p)}`);