make-mp-data 1.2.25 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,18 @@
4
4
  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
5
  "version": "0.2.0",
6
6
  "configurations": [
7
+ {
8
+ "command": "npm run simple",
9
+ "name": "simple",
10
+ "request": "launch",
11
+ "type": "node-terminal"
12
+ },
13
+ {
14
+ "command": "npm run complex",
15
+ "name": "complex",
16
+ "request": "launch",
17
+ "type": "node-terminal"
18
+ },
7
19
  {
8
20
  "type": "node",
9
21
  "request": "launch",
@@ -15,5 +27,6 @@
15
27
  "internalConsoleOptions": "neverOpen",
16
28
  "args": ["--ignore", "./data/"]
17
29
  }
30
+
18
31
  ]
19
32
  }
package/README.md CHANGED
@@ -1,50 +1,103 @@
1
1
 
2
-
3
2
  # Make Mixpanel Data
4
- a quick and dirty CLI in node.js to generate fake data for mixpanel.
5
3
 
6
- ## tldr;
4
+ ## 🤖 What is this?
5
+
6
+ Generate fake Mixpanel data _quickly_ and _easily_ with a simple CLI in Node.js. **Create events, user profiles, groups profiles, SCD data, mirror tables, and lookup tables**; basically everything you need for testing + demoing mixpanel analysis features.
7
+
8
+ the CLI looks like this:
9
+
10
+ ![Generate Mixpanel data](https://aktunes.neocities.org/makeDataDemo.gif)
11
+
12
+ under the hood, `make-mp-data` is modeling data adherent to match [Mixpanel's data model](https://docs.mixpanel.com/docs/data-structure/concepts), giving you the tools you need for robust, realistical field ready test data.
13
+
14
+ ## 🚀 Quick Start
15
+
16
+ ### Basic Usage
17
+
18
+ Generate events and users, and write them to CSV files:
7
19
 
8
-
9
- ```bash
20
+ ```bash
10
21
  npx make-mp-data
11
22
  ```
12
- - makes events + users (and writes them to CSVs)
13
23
 
14
- ```bash
15
- npx make-mp-data --numUsers 100 --numEvents 10000 --days 90 --format json
24
+ ### Customizing Output
25
+
26
+ Generate 10,000 events and 100 users over the last 90 days, and write the output as JSON:
27
+
28
+ ```bash
29
+ npx make-mp-data --numUsers 100 --numEvents 10000 --numDays 90 --format json
16
30
  ```
17
- - makes ~10k events + 100 users from the last 90 days (but writes JSON)
18
31
 
32
+ ### Complex Data Models
19
33
 
20
- ```bash
34
+ Create a comprehensive dataset including events, users, groups, SCD, and lookup tables:
35
+
36
+ ```bash
21
37
  npx make-mp-data --complex
22
38
  ```
23
- - makes events + users + groups + scd + lookup tables
24
- - this includes every type of data that mixpanel supports
39
+
40
+ ### Send Data to Mixpanel
41
+
42
+ Generate and send data directly to Mixpanel using your project token:
25
43
 
26
44
  ```bash
27
45
  npx make-mp-data --token 1234
28
46
  ```
29
- - makes events + users (and send them to mixpanel)
47
+
48
+ ### Help and Options
49
+
50
+ Need more info on available options?
30
51
 
31
52
  ```bash
32
53
  npx make-mp-data --help
33
54
  ```
34
- - explains all the options you can specify
35
55
 
36
- ## customization
56
+ ## 🔧 Schema
57
+
58
+ To choose your own event and property names and values, specify a custom data model and additional options:
37
59
 
38
60
  ```bash
39
61
  npx make-mp-data [dataModel.js] [options]
40
62
  ```
41
- ex.
63
+
64
+ Example:
42
65
 
43
66
  ```bash
44
67
  npx make-mp-data ecommSpec.js --token 1234 --numDays 30 --numUsers 1000 --numEvents 1000000
45
68
  ```
46
69
 
47
- see `--help` for a full list of options
70
+ ### Data Models
71
+
72
+ Check out `./models/` for example `dataModel.js` files to get started quickly.
73
+
74
+ ## 🛠️ CLI Options
75
+
76
+ Here's a breakdown of the CLI options you can use with `make-mp-data`:
77
+
78
+ - `--numUsers`: Number of users to generate.
79
+ - `--numEvents`: Number of events to generate.
80
+ - `--numDays`: Number of days over which to spread the generated data.
81
+ - `--format`: Output format (`csv` or `json`).
82
+ - `--token`: Mixpanel project token for direct data import.
83
+ - `--region`: Mixpanel data region (`US`, `EU`).
84
+ - `--writeToDisk`: Whether to write the data to disk (`true` or `false`).
85
+ - `--verbose`: Enable verbose logging.
86
+ - `--complex`: create a complex set models including groups, SCD, and lookup tables.
87
+ - `--simple`: create a simple dataset including events, and users
88
+
89
+ ## 📄 Examples
90
+
91
+ Check out the examples directory for sample data models:
92
+
93
+ ```bash
94
+ ls ./models/
95
+ ```
96
+
97
+ These models provide a great starting point for creating your own custom data generation scripts.
98
+
99
+ ## 🤝 Contributing
48
100
 
49
- see `./models/` for a few `dataModel.js` examples...
101
+ Feel free to fork this repository and submit pull requests. Contributions are always welcome!
50
102
 
103
+ For any issues or feature requests, please create an issue on the [GitHub repository](https://github.com/ak--47/make-mp-data/issues).
package/index.js CHANGED
@@ -1,6 +1,5 @@
1
1
  #! /usr/bin/env node
2
2
 
3
-
4
3
  /*
5
4
  make fake mixpanel data easily!
6
5
  by AK
@@ -20,9 +19,16 @@ const dayjs = require("dayjs");
20
19
  const utc = require("dayjs/plugin/utc");
21
20
  dayjs.extend(utc);
22
21
  const cliParams = require("./cli.js");
23
- const { makeName, md5 } = require('ak-tools');
22
+ const { makeName, md5, clone, tracker, uid } = require('ak-tools');
24
23
  const NOW = dayjs().unix();
25
24
  let VERBOSE = false;
25
+ let isCLI = false;
26
+
27
+ const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41");
28
+ function track() {
29
+ if (process.env.NODE_ENV === 'test') return;
30
+ metrics.track(...arguments);
31
+ }
26
32
 
27
33
  /** @typedef {import('./types.d.ts').Config} Config */
28
34
  /** @typedef {import('./types.d.ts').EventConfig} EventConfig */
@@ -43,12 +49,13 @@ async function main(config) {
43
49
  favoriteColor: ["red", "green", "blue", "yellow"],
44
50
  spiritAnimal: chance.animal.bind(chance),
45
51
  },
46
- scdProps = { NPS: u.weightedRange(0, 10, 150, 1.6) },
52
+ scdProps = {},
53
+ mirrorProps = {},
47
54
  groupKeys = [],
48
55
  groupProps = {},
49
56
  lookupTables = [],
50
- anonIds = true,
51
- sessionIds = true,
57
+ anonIds = false,
58
+ sessionIds = false,
52
59
  format = "csv",
53
60
  token = null,
54
61
  region = "US",
@@ -58,10 +65,27 @@ async function main(config) {
58
65
  } = config;
59
66
  VERBOSE = verbose;
60
67
  config.simulationName = makeName();
68
+ const { simulationName } = config;
61
69
  global.MP_SIMULATION_CONFIG = config;
62
70
  const uuidChance = new Chance(seed);
71
+ const runId = uid(42);
72
+ track('start simulation', {
73
+ runId,
74
+ seed,
75
+ numEvents,
76
+ numUsers,
77
+ numDays,
78
+ events,
79
+ anonIds,
80
+ sessionIds,
81
+ format,
82
+ token,
83
+ region,
84
+ writeToDisk,
85
+ isCLI
86
+ });
63
87
  log(`------------------SETUP------------------`);
64
- log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
88
+ log(`\nyour data simulation will heretofore be known as: \n\n\t${simulationName.toUpperCase()}...\n`);
65
89
  log(`and your configuration is:\n\n`, JSON.stringify({ seed, numEvents, numUsers, numDays, format, token, region, writeToDisk }, null, 2));
66
90
  log(`------------------SETUP------------------`, "\n");
67
91
 
@@ -100,7 +124,12 @@ async function main(config) {
100
124
  const firstEvents = events.filter((e) => e.isFirstEvent);
101
125
  const eventData = enrichArray([], { hook, type: "event", config });
102
126
  const userProfilesData = enrichArray([], { hook, type: "user", config });
103
- let scdTableData = enrichArray([], { hook, type: "scd", config });
127
+ const scdTableKeys = Object.keys(scdProps);
128
+ const scdTableData = [];
129
+ for (const [index, key] of scdTableKeys.entries()) {
130
+ scdTableData[index] = enrichArray([], { hook, type: "scd", config, scdKey: key });
131
+ }
132
+ // const scdTableData = enrichArray([], { hook, type: "scd", config });
104
133
  const groupProfilesData = enrichArray([], { hook, type: "groups", config });
105
134
  const lookupTableData = enrichArray([], { hook, type: "lookups", config });
106
135
  const avgEvPerUser = Math.floor(numEvents / numUsers);
@@ -112,8 +141,13 @@ async function main(config) {
112
141
  const user = generateUser();
113
142
  const { distinct_id, $created, anonymousIds, sessionIds } = user;
114
143
  userProfilesData.hPush(makeProfile(userProps, user));
115
- const mutations = chance.integer({ min: 1, max: 10 });
116
- scdTableData.hPush(makeSCD(scdProps, distinct_id, mutations, $created));
144
+
145
+ //scd loop
146
+ for (const [index, key] of scdTableKeys.entries()) {
147
+ const mutations = chance.integer({ min: 1, max: 10 });
148
+ scdTableData[index].hPush(makeSCD(scdProps[key], key, distinct_id, mutations, $created));
149
+ }
150
+
117
151
  const numEventsThisUser = Math.round(
118
152
  chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / u.integer(3, 7) })
119
153
  );
@@ -150,7 +184,7 @@ async function main(config) {
150
184
  }
151
185
 
152
186
  //flatten SCD
153
- scdTableData = scdTableData.flat();
187
+ scdTableData.forEach((table, index) => scdTableData[index] = table.flat());
154
188
 
155
189
  log("\n");
156
190
 
@@ -186,25 +220,56 @@ async function main(config) {
186
220
  }
187
221
  lookupTableData.hPush({ key, data });
188
222
  }
189
- const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, folder } =
223
+
224
+ // deal with mirror props
225
+ let mirrorEventData = [];
226
+ const mirrorPropKeys = Object.keys(mirrorProps);
227
+ if (mirrorPropKeys.length) {
228
+ mirrorEventData = clone(eventData);
229
+ for (const row of mirrorEventData) {
230
+ for (const key of mirrorPropKeys) {
231
+ if (mirrorProps[key]?.events?.includes(row?.event)) row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
232
+ if (mirrorProps[key]?.events === "*") row[key] = hook(u.choose(mirrorProps[key]?.values), "mirror", { config, row, key });
233
+ }
234
+ }
235
+ }
236
+
237
+ const { eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder } =
190
238
  buildFileNames(config);
191
239
  const pairs = [
192
- [eventFiles, eventData],
193
- [userFiles, userProfilesData],
240
+ [eventFiles, [eventData]],
241
+ [userFiles, [userProfilesData]],
194
242
  [scdFiles, scdTableData],
195
243
  [groupFiles, groupProfilesData],
196
244
  [lookupFiles, lookupTableData],
245
+ [mirrorFiles, [mirrorEventData]],
197
246
  ];
198
247
  log("\n");
199
248
  log(`---------------SIMULATION----------------`, "\n");
200
249
 
201
250
  if (!writeToDisk && !token) {
251
+ track('end simulation', {
252
+ runId,
253
+ seed,
254
+ numEvents,
255
+ numUsers,
256
+ numDays,
257
+ events,
258
+ anonIds,
259
+ sessionIds,
260
+ format,
261
+ token,
262
+ region,
263
+ writeToDisk,
264
+ isCLI
265
+ });
202
266
  return {
203
267
  eventData,
204
268
  userProfilesData,
205
269
  scdTableData,
206
270
  groupProfilesData,
207
271
  lookupTableData,
272
+ mirrorEventData,
208
273
  import: {},
209
274
  files: []
210
275
  };
@@ -212,32 +277,39 @@ async function main(config) {
212
277
  log(`-----------------WRITES------------------`, `\n\n`);
213
278
  //write the files
214
279
  if (writeToDisk) {
215
- if (verbose) log(`writing files... for ${config.simulationName}`);
216
- loopFiles: for (const pair of pairs) {
217
- const [paths, data] = pair;
280
+ if (verbose) log(`writing files... for ${simulationName}`);
281
+ loopFiles: for (const ENTITY of pairs) {
282
+ const [paths, data] = ENTITY;
218
283
  if (!data.length) continue loopFiles;
219
- for (const path of paths) {
220
- let datasetsToWrite;
221
- if (data?.[0]?.["key"]) datasetsToWrite = data.map((d) => d.data);
222
- else datasetsToWrite = [data];
223
- for (const writeData of datasetsToWrite) {
224
- //if it's a lookup table, it's always a CSV
225
- if (format === "csv" || path.includes("-LOOKUP.csv")) {
226
- log(`\twriting ${path}`);
227
- const columns = u.getUniqueKeys(writeData);
228
- //papa parse needs nested JSON stringified
229
- writeData.forEach((e) => {
230
- for (const key in e) {
231
- if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
232
- }
233
- });
234
- const csv = Papa.unparse(writeData, { columns });
235
- await touch(path, csv);
236
- } else {
237
- const ndjson = data.map((d) => JSON.stringify(d)).join("\n");
238
- await touch(path, ndjson, false);
239
- }
284
+ for (const [index, path] of paths.entries()) {
285
+ let TABLE;
286
+ //group + lookup tables are structured differently
287
+ if (data?.[index]?.["key"]) {
288
+ TABLE = data[index].data;
289
+ }
290
+ else {
291
+ TABLE = data[index];
240
292
  }
293
+
294
+ log(`\twriting ${path}`);
295
+ //if it's a lookup table, it's always a CSV
296
+ if (format === "csv" || path.includes("-LOOKUP.csv")) {
297
+ const columns = u.getUniqueKeys(TABLE);
298
+ //papa parse needs eac nested field JSON stringified
299
+ TABLE.forEach((e) => {
300
+ for (const key in e) {
301
+ if (typeof e[key] === "object") e[key] = JSON.stringify(e[key]);
302
+ }
303
+ });
304
+
305
+ const csv = Papa.unparse(TABLE, { columns });
306
+ await touch(path, csv);
307
+ }
308
+ else {
309
+ const ndjson = TABLE.map((d) => JSON.stringify(d)).join("\n");
310
+ await touch(path, ndjson, false);
311
+ }
312
+
241
313
  }
242
314
  }
243
315
  }
@@ -298,9 +370,24 @@ async function main(config) {
298
370
 
299
371
  }
300
372
  log(`\n-----------------WRITES------------------`, "\n");
373
+ track('end simulation', {
374
+ runId,
375
+ seed,
376
+ numEvents,
377
+ numUsers,
378
+ numDays,
379
+ events,
380
+ anonIds,
381
+ sessionIds,
382
+ format,
383
+ token,
384
+ region,
385
+ writeToDisk,
386
+ isCLI
387
+ });
301
388
  return {
302
389
  import: importResults,
303
- files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, folder],
390
+ files: [eventFiles, userFiles, scdFiles, groupFiles, lookupFiles, mirrorFiles, folder],
304
391
  };
305
392
  }
306
393
 
@@ -323,16 +410,23 @@ function makeProfile(props, defaults) {
323
410
 
324
411
  return profile;
325
412
  }
326
-
327
- function makeSCD(props, distinct_id, mutations, $created) {
328
- if (JSON.stringify(props) === "{}") return [];
413
+ /**
414
+ * @param {import('./types.d.ts').valueValid} prop
415
+ * @param {string} scdKey
416
+ * @param {string} distinct_id
417
+ * @param {number} mutations
418
+ * @param {string} $created
419
+ */
420
+ function makeSCD(prop, scdKey, distinct_id, mutations, $created) {
421
+ if (JSON.stringify(prop) === "{}") return {};
422
+ if (JSON.stringify(prop) === "[]") return [];
329
423
  const scdEntries = [];
330
424
  let lastInserted = dayjs($created);
331
425
  const deltaDays = dayjs().diff(lastInserted, "day");
332
426
 
333
427
  for (let i = 0; i < mutations; i++) {
334
428
  if (lastInserted.isAfter(dayjs())) break;
335
- const scd = makeProfile(props, { distinct_id });
429
+ const scd = makeProfile({ [scdKey]: prop }, { distinct_id });
336
430
  scd.startTime = lastInserted.toISOString();
337
431
  lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
338
432
  scd.insertTime = lastInserted.toISOString();
@@ -421,12 +515,21 @@ function buildFileNames(config) {
421
515
  const writePaths = {
422
516
  eventFiles: [path.join(writeDir, `${simName}-EVENTS.${extension}`)],
423
517
  userFiles: [path.join(writeDir, `${simName}-USERS.${extension}`)],
424
- scdFiles: [path.join(writeDir, `${simName}-SCD.${extension}`)],
518
+ scdFiles: [],
519
+ mirrorFiles: [path.join(writeDir, `${simName}-EVENTS-FUTURE-MIRROR.${extension}`)],
425
520
  groupFiles: [],
426
521
  lookupFiles: [],
427
522
  folder: writeDir,
428
523
  };
429
524
 
525
+ //add SCD files
526
+ const scdKeys = Object.keys(config?.scdProps || {});
527
+ for (const key of scdKeys) {
528
+ writePaths.scdFiles.push(
529
+ path.join(writeDir, `${simName}-${key}-SCD.${extension}`)
530
+ );
531
+ }
532
+
430
533
  for (const groupPair of groupKeys) {
431
534
  const groupKey = groupPair[0];
432
535
  writePaths.groupFiles.push(
@@ -447,10 +550,10 @@ function buildFileNames(config) {
447
550
 
448
551
 
449
552
  function enrichArray(arr = [], opts = {}) {
450
- const { hook = a => a, type = "", config = {} } = opts;
553
+ const { hook = a => a, type = "", ...rest } = opts;
451
554
 
452
555
  function transformThenPush(item) {
453
- return arr.push(hook(item, type, config));
556
+ return arr.push(hook(item, type, rest));
454
557
  }
455
558
 
456
559
  arr.hPush = transformThenPush;
@@ -462,6 +565,7 @@ function enrichArray(arr = [], opts = {}) {
462
565
 
463
566
  // this is for CLI
464
567
  if (require.main === module) {
568
+ isCLI = true;
465
569
  const args = cliParams();
466
570
  const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false } = args;
467
571
  const suppliedConfig = args._[0];
@@ -543,5 +647,13 @@ if (require.main === module) {
543
647
 
544
648
 
545
649
  function log(...args) {
650
+ const cwd = process.cwd(); // Get the current working directory
651
+
652
+ for (let i = 0; i < args.length; i++) {
653
+ // Replace occurrences of the current working directory with "./" in string arguments
654
+ if (typeof args[i] === 'string') {
655
+ args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
656
+ }
657
+ }
546
658
  if (VERBOSE) console.log(...args);
547
659
  }
package/models/complex.js CHANGED
@@ -1,5 +1,5 @@
1
1
  /**
2
- * This is the default configuration file for the data generator
2
+ * This is the default configuration file for the data generator in COMPLEX mode
3
3
  * notice how the config object is structured, and see it's type definition in ./types.d.ts
4
4
  * feel free to modify this file to customize the data you generate
5
5
  * see helper functions in utils.js for more ways to generate data
@@ -59,7 +59,7 @@ const config = {
59
59
  watchTimeSec: weightedRange(10, 600, 1000, .25),
60
60
  quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
61
61
  format: ["mp4", "avi", "mov", "mpg"],
62
- uploader_id: chance.guid.bind(chance)
62
+ video_id: weightedRange(1, 50000, 420000, 1.4),
63
63
 
64
64
  }
65
65
  },
@@ -120,12 +120,22 @@ const config = {
120
120
 
121
121
  },
122
122
 
123
+ /** each generates it's own table */
123
124
  scdProps: {
124
125
  plan: ["free", "free", "free", "free", "basic", "basic", "basic", "premium", "premium", "enterprise"],
125
126
  MRR: weightedRange(0, 10000, 1000, .15),
126
127
  NPS: weightedRange(0, 10, 150, 2),
127
- marketingOptIn: [true, true, false],
128
- dateOfRenewal: date(100, false),
128
+ subscribed: [true, true, true, true, true, true, false, false, false, false, "it's complicated"],
129
+ renewalDate: date(100, false),
130
+ },
131
+
132
+ mirrorProps: {
133
+ isBot: { events: "*", values: [false, false, false, false, true] },
134
+ profit: { events: ["checkout"], values: [4, 2, 42, 420] },
135
+ watchTimeSec: {
136
+ events: ["watch video"],
137
+ values: weightedRange(50, 1200, 247, 6)
138
+ }
129
139
  },
130
140
 
131
141
  /*
@@ -133,7 +143,8 @@ const config = {
133
143
  each pair represents a group_key and the number of profiles for that key
134
144
  */
135
145
  groupKeys: [
136
- ['company_id', 350],
146
+ ['company_id', 500],
147
+ ['room_id', 10000],
137
148
 
138
149
  ],
139
150
  groupProps: {
@@ -144,6 +155,15 @@ const config = {
144
155
  "industry": ["tech", "finance", "healthcare", "education", "government", "non-profit"],
145
156
  "segment": ["enterprise", "SMB", "mid-market"],
146
157
  "products": [["core"], ["core"], ["core", "add-ons"], ["core", "pro-serve"], ["core", "add-ons", "pro-serve"], ["core", "BAA", "enterprise"], ["free"], ["free"], ["free", "addons"]],
158
+ },
159
+ room_id: {
160
+ $name: () => { return `#${chance.word({ length: integer(4, 24), capitalize: true })}`; },
161
+ $email: ["public", "private"],
162
+ "room provider": ["partner", "core", "core", "core"],
163
+ "room capacity": weightedRange(3, 1000000),
164
+ "isPublic": [true, false, false, false, false],
165
+ "country": chance.country.bind(chance),
166
+ "isVerified": [true, true, false, false, false],
147
167
  }
148
168
  },
149
169
 
@@ -153,29 +173,7 @@ const config = {
153
173
  entries: 1000,
154
174
  attributes: {
155
175
  category: [
156
- "Books",
157
- "Movies",
158
- "Music",
159
- "Games",
160
- "Electronics",
161
- "Computers",
162
- "Smart Home",
163
- "Home",
164
- "Garden & Tools",
165
- "Pet Supplies",
166
- "Food & Grocery",
167
- "Beauty",
168
- "Health",
169
- "Toys",
170
- "Kids",
171
- "Baby",
172
- "Handmade",
173
- "Sports",
174
- "Outdoors",
175
- "Automotive",
176
- "Industrial",
177
- "Entertainment",
178
- "Art"
176
+ "Books", "Movies", "Music", "Games", "Electronics", "Computers", "Smart Home", "Home", "Garden & Tools", "Pet Supplies", "Food & Grocery", "Beauty", "Health", "Toys", "Kids", "Baby", "Handmade", "Sports", "Outdoors", "Automotive", "Industrial", "Entertainment", "Art"
179
177
  ],
180
178
  "demand": ["high", "medium", "medium", "low"],
181
179
  "supply": ["high", "medium", "medium", "low"],
@@ -185,6 +183,20 @@ const config = {
185
183
  "reviews": weightedRange(0, 35)
186
184
  }
187
185
 
186
+ },
187
+ {
188
+ key: "video_id",
189
+ entries: 50000,
190
+ attributes: {
191
+ isFlagged: [true, false, false, false, false],
192
+ copyright: ["all rights reserved", "creative commons", "creative commons", "public domain", "fair use"],
193
+ uploader_id: chance.guid.bind(chance),
194
+ "uploader influence": ["low", "low", "low", "medium", "medium", "high"],
195
+ rating: weightedRange(1, 5),
196
+ thumbs: weightedRange(0, 35),
197
+ rating: ["G", "PG", "PG-13", "R", "NC-17", "PG-13", "R", "NC-17", "R", "PG", "PG"]
198
+ }
199
+
188
200
  }
189
201
  ],
190
202
 
@@ -18,17 +18,17 @@ const config = {
18
18
  groupProperties: {}
19
19
  };
20
20
 
21
- let formats = ['2160p', '1440p', '1080p', '720p', '480p', '360p', '240p'];
22
- let ratios = ['4:3', '16:10', '16:9'];
23
- let containers = ["WEBM", ["MPG", "MP2", "MPEG"], ["MP4", "M4P", "M4V"], ["AVI", "WMV"], ["MOV", "QT"], ["FLV", "SWF"], "AVCHD"];
24
- let hashtags = ["#AK", "#bitcoin", "#cureForMiley", "#faceValue", "#blm", "#fwiw", "#inappropriateFuneralSongs", "#jurassicPork", "#lolCats", "#wheatForSheep", "#momTexts", "#myWeirdGymStory", "#poppy", "#resist", "#tbt", "#wilson", "#worstGiftEver", "#yolo", "#phish", "#crypto", "#memes", "#wrongMovie", "#careerEndingTwitterTypos", "#twoThingsThatDontMix"];
25
- let platforms = ["Web", "Mobile Web", "Native (Android)", "Native (iOS)", "Native (Desktop)", "IoT"];
26
- let plans = ['free', 'premium', 'casual', 'influencer'];
27
- let categories = ["Product reviews video", "How-to videos", "Vlogs", "Gaming videos", "Comedy/skit videos", "Haul videos", "Memes/tags", "Favorites/best of", "Educational videos", "Unboxing videos", "Q&A videos", "Collection", "Prank videos"];
28
- let marketingChannels = ["Organic", "Organic", "Organic", "Organic", "Instagram Ads", "Facebook Ads", "Google Ads", "Youtube Ads", "Instagram Post", "Instagram Post", "Facebook Post"];
29
-
30
21
 
31
22
  function generateVideoMeta() {
23
+ let formats = ['2160p', '1440p', '1080p', '720p', '480p', '360p', '240p'];
24
+ let ratios = ['4:3', '16:10', '16:9'];
25
+ let containers = ["WEBM", ["MPG", "MP2", "MPEG"], ["MP4", "M4P", "M4V"], ["AVI", "WMV"], ["MOV", "QT"], ["FLV", "SWF"], "AVCHD"];
26
+ let hashtags = ["#AK", "#bitcoin", "#cureForMiley", "#faceValue", "#blm", "#fwiw", "#inappropriateFuneralSongs", "#jurassicPork", "#lolCats", "#wheatForSheep", "#momTexts", "#myWeirdGymStory", "#poppy", "#resist", "#tbt", "#wilson", "#worstGiftEver", "#yolo", "#phish", "#crypto", "#memes", "#wrongMovie", "#careerEndingTwitterTypos", "#twoThingsThatDontMix"];
27
+ let platforms = ["Web", "Mobile Web", "Native (Android)", "Native (iOS)", "Native (Desktop)", "IoT"];
28
+ let plans = ['free', 'premium', 'casual', 'influencer'];
29
+ let categories = ["Product reviews video", "How-to videos", "Vlogs", "Gaming videos", "Comedy/skit videos", "Haul videos", "Memes/tags", "Favorites/best of", "Educational videos", "Unboxing videos", "Q&A videos", "Collection", "Prank videos"];
30
+ let marketingChannels = ["Organic", "Organic", "Organic", "Organic", "Instagram Ads", "Facebook Ads", "Google Ads", "Youtube Ads", "Instagram Post", "Instagram Post", "Facebook Post"];
31
+
32
32
 
33
33
  let videoTemplate = {
34
34
  videoFormatInfo: {
package/models/simple.js CHANGED
@@ -1,3 +1,13 @@
1
+ /**
2
+ * This is the default configuration file for the data generator in SIMPLE mode
3
+ * notice how the config object is structured, and see it's type definition in ./types.d.ts
4
+ * feel free to modify this file to customize the data you generate
5
+ * see helper functions in utils.js for more ways to generate data
6
+ */
7
+
8
+
9
+
10
+
1
11
  const Chance = require('chance');
2
12
  const chance = new Chance();
3
13
  const dayjs = require("dayjs");
@@ -24,9 +34,9 @@ const config = {
24
34
 
25
35
  events: [
26
36
  {
27
- "event": "checkout",
28
- "weight": 2,
29
- "properties": {
37
+ event: "checkout",
38
+ weight: 2,
39
+ properties: {
30
40
  amount: weightedRange(5, 500, 1000, .25),
31
41
  currency: ["USD", "CAD", "EUR", "BTC", "ETH", "JPY"],
32
42
  coupon: ["none", "none", "none", "none", "10%OFF", "20%OFF", "10%OFF", "20%OFF", "30%OFF", "40%OFF", "50%OFF"],
@@ -35,9 +45,9 @@ const config = {
35
45
  }
36
46
  },
37
47
  {
38
- "event": "add to cart",
39
- "weight": 4,
40
- "properties": {
48
+ event: "add to cart",
49
+ weight: 4,
50
+ properties: {
41
51
  amount: weightedRange(5, 500, 1000, .25),
42
52
  rating: weightedRange(1, 5),
43
53
  reviews: weightedRange(0, 35),
@@ -48,17 +58,17 @@ const config = {
48
58
  }
49
59
  },
50
60
  {
51
- "event": "page view",
52
- "weight": 10,
53
- "properties": {
61
+ event: "page view",
62
+ weight: 10,
63
+ properties: {
54
64
  page: ["/", "/", "/help", "/account", "/watch", "/listen", "/product", "/people", "/peace"],
55
65
  utm_source: ["$organic", "$organic", "$organic", "$organic", "google", "google", "google", "facebook", "facebook", "twitter", "linkedin"],
56
66
  }
57
67
  },
58
68
  {
59
- "event": "watch video",
60
- "weight": 8,
61
- "properties": {
69
+ event: "watch video",
70
+ weight: 8,
71
+ properties: {
62
72
  videoCategory: weighList(videoCategories, integer(0, 9)),
63
73
  isFeaturedItem: [true, false, false],
64
74
  watchTimeSec: weightedRange(10, 600, 1000, .25),
@@ -69,9 +79,9 @@ const config = {
69
79
  }
70
80
  },
71
81
  {
72
- "event": "view item",
73
- "weight": 8,
74
- "properties": {
82
+ event: "view item",
83
+ weight: 8,
84
+ properties: {
75
85
  isFeaturedItem: [true, false, false],
76
86
  itemCategory: weighList(itemCategories, integer(0, 27)),
77
87
  dateItemListed: date(30, true, 'YYYY-MM-DD'),
@@ -79,9 +89,9 @@ const config = {
79
89
  }
80
90
  },
81
91
  {
82
- "event": "save item",
83
- "weight": 5,
84
- "properties": {
92
+ event: "save item",
93
+ weight: 5,
94
+ properties: {
85
95
  isFeaturedItem: [true, false, false],
86
96
  itemCategory: weighList(itemCategories, integer(0, 27)),
87
97
  dateItemListed: date(30, true, 'YYYY-MM-DD'),
@@ -89,10 +99,10 @@ const config = {
89
99
  }
90
100
  },
91
101
  {
92
- "event": "sign up",
93
- "isFirstEvent": true,
94
- "weight": 0,
95
- "properties": {
102
+ event: "sign up",
103
+ isFirstEvent: true,
104
+ weight: 0,
105
+ properties: {
96
106
  variants: ["A", "B", "C", "Control"],
97
107
  flows: ["new", "existing", "loyal", "churned"],
98
108
  flags: ["on", "off"],
@@ -118,6 +128,15 @@ const config = {
118
128
  },
119
129
 
120
130
  scdProps: {},
131
+ mirrorProps: {
132
+ isBot: { events: "*", values: [false, false, false, false, true] },
133
+ profit: { events: ["checkout"], values: [4, 2, 42, 420] },
134
+ watchTimeSec: {
135
+ events: ["watch video"],
136
+ values: weightedRange(50, 1200, 247, 6)
137
+ }
138
+
139
+ },
121
140
 
122
141
  /*
123
142
  for group analytics keys, we need an array of arrays [[],[],[]]
package/package.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "1.2.25",
3
+ "version": "1.3.0",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "main": "index.js",
6
6
  "types": "types.d.ts",
7
7
  "scripts": {
8
8
  "start": "node index.js",
9
9
  "dev": "./scripts/go.sh",
10
- "complex": "nodemon index.js --complex",
11
- "simple": "nodemon index.js",
10
+ "complex": "nodemon index.js --complex --e 10000 --u 100",
11
+ "simple": "nodemon index.js --simple --e 10000 --u 100",
12
12
  "prune": "rm ./data/*",
13
13
  "post": "npm publish",
14
- "test": "jest --runInBand",
14
+ "test": "NODE_ENV=test jest --runInBand",
15
15
  "deps": "sh ./scripts/deps.sh"
16
16
  },
17
17
  "repository": {
@@ -40,7 +40,7 @@
40
40
  },
41
41
  "homepage": "https://github.com/ak--47/make-mp-data#readme",
42
42
  "dependencies": {
43
- "ak-tools": "^1.0.57",
43
+ "ak-tools": "^1.0.58",
44
44
  "chance": "^1.1.11",
45
45
  "dayjs": "^1.11.11",
46
46
  "mixpanel-import": "^2.5.51",
package/tests/e2e.test.js CHANGED
@@ -24,7 +24,7 @@ describe('module', () => {
24
24
  expect(eventData.length).toBeGreaterThan(980);
25
25
  expect(groupProfilesData.length).toBe(0);
26
26
  expect(lookupTableData.length).toBe(0);
27
- expect(scdTableData.length).toBeGreaterThan(200);
27
+ expect(scdTableData.length).toBe(0);
28
28
  expect(userProfilesData.length).toBe(100);
29
29
 
30
30
  }, timeout);
@@ -46,10 +46,10 @@ describe('module', () => {
46
46
  const results = await generate({ ...complex, verbose: true, writeToDisk: false, numEvents: 1100, numUsers: 100, seed: "deal with it" });
47
47
  const { eventData, groupProfilesData, lookupTableData, scdTableData, userProfilesData } = results;
48
48
  expect(eventData.length).toBeGreaterThan(980);
49
- expect(groupProfilesData[0]?.data?.length).toBe(350);
50
- expect(lookupTableData.length).toBe(1);
49
+ expect(groupProfilesData[0]?.data?.length).toBe(500);
50
+ expect(lookupTableData.length).toBe(2);
51
51
  expect(lookupTableData[0].data.length).toBe(1000);
52
- expect(scdTableData.length).toBeGreaterThan(200);
52
+ expect(scdTableData.length).toBe(5);
53
53
  expect(userProfilesData.length).toBe(100);
54
54
 
55
55
  }, timeout);
@@ -61,21 +61,30 @@ describe('module', () => {
61
61
  expect(eventData.length).toBeGreaterThan(980);
62
62
  expect(groupProfilesData.length).toBe(0);
63
63
  expect(lookupTableData.length).toBe(0);
64
- expect(scdTableData.length).toBeGreaterThan(200);
64
+ expect(scdTableData.length).toBe(0);
65
65
  expect(userProfilesData.length).toBe(100);
66
66
 
67
67
  }, timeout);
68
68
 
69
+ test('fails with invalid configuration', async () => {
70
+ try {
71
+ await generate({ numUsers: -10 });
72
+ } catch (e) {
73
+ expect(e).toBeDefined();
74
+ }
75
+ }, timeout);
76
+
77
+
69
78
 
70
79
  });
71
80
 
72
81
  describe('cli', () => {
73
82
  test('works as CLI (complex)', async () => {
74
83
  console.log('COMPLEX CLI TEST');
75
- const run = execSync(`node ./index.js --numEvents 1000 --numUsers 100 --seed "deal with it" --complex`);
76
- expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
84
+ const run = execSync(`node ./index.js --numEvents 1000 --numUsers 100 --seed "deal with it" --complex`, { stdio: 'ignore' });
85
+ // expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
77
86
  const csvs = (await u.ls('./data')).filter(a => a.includes('.csv'));
78
- expect(csvs.length).toBe(5);
87
+ expect(csvs.length).toBe(12);
79
88
  clearData();
80
89
  }, timeout);
81
90
 
@@ -84,7 +93,7 @@ describe('cli', () => {
84
93
  const run = execSync(`node ./index.js --numEvents 1000 --numUsers 100 --seed "deal with it"`);
85
94
  expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
86
95
  const csvs = (await u.ls('./data')).filter(a => a.includes('.csv'));
87
- expect(csvs.length).toBe(2);
96
+ expect(csvs.length).toBe(3);
88
97
  clearData();
89
98
  }, timeout);
90
99
 
@@ -93,12 +102,13 @@ describe('cli', () => {
93
102
  const run = execSync(`node ./index.js ./models/deepNest.js`);
94
103
  expect(run.toString().trim().includes('have a wonderful day :)')).toBe(true);
95
104
  const csvs = (await u.ls('./data')).filter(a => a.includes('.csv'));
96
- expect(csvs.length).toBe(3);
105
+ expect(csvs.length).toBe(2);
97
106
  clearData();
98
107
  }, timeout);
99
108
 
100
109
  });
101
110
 
111
+
102
112
  describe('options + tweaks', () => {
103
113
  test('creates sessionIds', async () => {
104
114
  const results = await generate({ writeToDisk: false, numEvents: 1000, numUsers: 100, sessionIds: true });
@@ -60,7 +60,7 @@ describe('utils', () => {
60
60
  const generatedPerson = person();
61
61
  expect(generatedPerson).toHaveProperty('$name');
62
62
  expect(generatedPerson).toHaveProperty('$email');
63
- expect(generatedPerson).toHaveProperty('$avatar');
63
+ expect(generatedPerson).toHaveProperty('$avatar');
64
64
  });
65
65
 
66
66
 
@@ -96,6 +96,38 @@ describe('utils', () => {
96
96
  expect(result).toBe('test');
97
97
  });
98
98
 
99
+ test('choose: non-function / non-array', () => {
100
+ expect(choose('test')).toBe('test');
101
+ expect(choose(123)).toBe(123);
102
+ });
103
+
104
+ test('choose: nested functions', () => {
105
+ const result = choose(() => () => () => 'nested');
106
+ expect(result).toBe('nested');
107
+ });
108
+
109
+ test('weightedRange: within range', () => {
110
+ const values = weightedRange(5, 15, 100);
111
+ expect(values.every(v => v >= 5 && v <= 15)).toBe(true);
112
+ expect(values.length).toBe(100);
113
+ });
114
+
115
+ test('applySkew: skews', () => {
116
+ const value = boxMullerRandom();
117
+ const skewedValue = applySkew(value, .25);
118
+ expect(Math.abs(skewedValue)).toBeGreaterThanOrEqual(Math.abs(value));
119
+ });
120
+
121
+ test('mapToRange: works', () => {
122
+ const value = 0;
123
+ const mean = 10;
124
+ const sd = 5;
125
+ const mappedValue = mapToRange(value, mean, sd);
126
+ expect(mappedValue).toBe(10);
127
+ });
128
+
129
+
130
+
99
131
  test('exhaust: elements', () => {
100
132
  const arr = [1, 2, 3];
101
133
  const exhaustFn = exhaust([...arr]);
@@ -161,7 +193,7 @@ describe('utils', () => {
161
193
  expect(typeof emojis).toBe('string');
162
194
  const emojiArray = emojis.split(', ');
163
195
  expect(emojiArray.length).toBeLessThanOrEqual(10); // Assuming max default is 10
164
-
196
+
165
197
  });
166
198
 
167
199
 
package/types.d.ts CHANGED
@@ -1,82 +1,114 @@
1
1
  declare namespace main {
2
- type primitives = string | number | boolean | Date | Object;
3
- type valueValid =
4
- | primitives
5
- | primitives[]
6
- | (() => primitives | primitives[]);
7
-
8
- export interface Config {
9
- token?: string;
10
- seed?: string;
11
- numDays?: number;
12
- numEvents?: number;
13
- numUsers?: number;
14
- format?: "csv" | "json";
15
- region?: string;
16
- events?: EventConfig[];
17
- superProps?: Record<string, valueValid>;
18
- userProps?: Record<string, valueValid>;
19
- scdProps?: Record<string, valueValid>;
20
- groupKeys?: [string, number][];
21
- groupProps?: Record<string, GroupProperty>; // Adjust according to usage
22
- lookupTables?: LookupTable[];
23
- writeToDisk?: boolean;
24
- simulationName?: string;
25
- verbose?: boolean;
26
- anonIds?: boolean;
27
- sessionIds?: boolean;
28
- hook?: Hook;
2
+ type Primitives = string | number | boolean | Date | Record<string, any>;
3
+
4
+ // Recursive type to handle functions returning functions that eventually return Primitives or arrays of Primitives
5
+ type ValueValid =
6
+ | Primitives
7
+ | ValueValid[]
8
+ | (() => ValueValid);
9
+
10
+ // MAIN CONFIGURATION OBJECT
11
+ export interface Config {
12
+ token?: string;
13
+ seed?: string;
14
+ numDays?: number;
15
+ numEvents?: number;
16
+ numUsers?: number;
17
+ format?: "csv" | "json";
18
+ region?: string;
19
+ events?: EventConfig[];
20
+ superProps?: Record<string, ValueValid>;
21
+ userProps?: Record<string, ValueValid>;
22
+ scdProps?: Record<string, ValueValid>;
23
+ mirrorProps?: Record<string, MirrorProps>;
24
+ groupKeys?: [string, number][];
25
+ groupProps?: Record<string, Record<string, ValueValid>>;
26
+ lookupTables?: LookupTable[];
27
+ writeToDisk?: boolean;
28
+ simulationName?: string;
29
+ verbose?: boolean;
30
+ anonIds?: boolean;
31
+ sessionIds?: boolean;
32
+ hook?: Hook;
33
+ }
34
+
35
+ export type Hook = (record: any, type: string, meta: any) => any;
36
+
37
+ export interface EventConfig {
38
+ event?: string;
39
+ weight?: number;
40
+ properties?: Record<string, ValueValid>;
41
+ isFirstEvent?: boolean;
42
+ }
43
+
44
+ export interface MirrorProps {
45
+ events: string[] | "*";
46
+ values: ValueValid[];
47
+ }
48
+
49
+ export interface LookupTable {
50
+ key: string;
51
+ entries: number;
52
+ attributes: Record<string, ValueValid>;
53
+ }
54
+
55
+ export interface SCDTable {
56
+ distinct_id: string;
57
+ insertTime: string;
58
+ startTime: string;
59
+ [key: string]: ValueValid;
60
+ }
61
+
62
+ export type Result = {
63
+ eventData: EventData[];
64
+ userProfilesData: any[];
65
+ scdTableData: any[];
66
+ groupProfilesData: GroupProfilesData[];
67
+ lookupTableData: LookupTableData[];
68
+ import?: ImportResults;
69
+ files?: string[];
70
+ };
71
+
72
+ export interface EventData {
73
+ event: string;
74
+ $source: string;
75
+ time: string;
76
+ $device_id?: string;
77
+ $session_id?: string;
78
+ $user_id?: string;
79
+ [key: string]: any;
80
+ }
81
+
82
+ export interface GroupProfilesData {
83
+ key: string;
84
+ data: any[];
85
+ }
86
+
87
+ export interface LookupTableData {
88
+ key: string;
89
+ data: any[];
90
+ }
91
+
92
+ export interface ImportResults {
93
+ events: ImportResult;
94
+ users: ImportResult;
95
+ groups: ImportResult[];
96
+ }
97
+
98
+ export interface ImportResult {
99
+ success: number;
100
+ bytes: number;
101
+ }
29
102
  }
30
-
31
- export type Hook = (record: any, type: string, meta: any) => any;
32
-
33
- interface EventConfig {
34
- event?: string;
35
- weight?: number;
36
- properties?: {
37
- [key: string]: valueValid; // Consider refining based on actual properties used
38
- };
39
- isFirstEvent?: boolean;
40
- }
41
-
42
- interface GroupProperty {
43
- [key?: string]: valueValid;
44
- }
45
-
46
- interface LookupTable {
47
- key: string;
48
- entries: number;
49
- attributes: {
50
- [key?: string]: valueValid;
51
- };
52
- }
53
-
54
- type Result = {
55
- eventData: {
56
- event: any;
57
- $source: string;
58
- }[];
59
- userProfilesData: any[];
60
- scdTableData: any[];
61
- groupProfilesData: {
62
- key: string;
63
- data: any[];
64
- }[];
65
- lookupTableData: {
66
- key: string;
67
- data: any[];
68
- }[];
69
- import?: undefined;
70
- files?: undefined;
71
- };
72
- }
73
-
74
- /**
75
- * Mixpanel Data Generator
76
- * model events, users, groups, and lookup tables (and SCD props!)
77
- * @example
78
- * const gen = require('make-mp-data')
79
- * const dta = gen({writeToDisk: false})
80
- */
81
- declare function main(config: main.Config): Promise<main.Result>;
82
- export = main;
103
+
104
+ /**
105
+ * Mixpanel Data Generator
106
+ * model events, users, groups, and lookup tables (and SCD props!)
107
+ * @example
108
+ * const gen = require('make-mp-data')
109
+ * const dta = gen({writeToDisk: false})
110
+ */
111
+ declare function main(config: main.Config): Promise<main.Result>;
112
+
113
+ export = main;
114
+
package/utils.js CHANGED
@@ -155,7 +155,7 @@ function mapToRange(value, mean, sd) {
155
155
  return Math.round(value * sd + mean);
156
156
  };
157
157
 
158
- function weightedRange(min, max, size = 100, skew = 1) {
158
+ function unOptimizedWeightedRange(min, max, size = 100, skew = 1) {
159
159
  const mean = (max + min) / 2;
160
160
  const sd = (max - min) / 4;
161
161
  let array = [];
@@ -176,6 +176,23 @@ function weightedRange(min, max, size = 100, skew = 1) {
176
176
  return array;
177
177
  };
178
178
 
179
+ // optimized weighted range
180
+ function weightedRange(min, max, size = 100, skew = 1) {
181
+ const mean = (max + min) / 2;
182
+ const sd = (max - min) / 4;
183
+ const array = [];
184
+ while (array.length < size) {
185
+ const normalValue = boxMullerRandom();
186
+ const skewedValue = applySkew(normalValue, skew);
187
+ const mappedValue = mapToRange(skewedValue, mean, sd);
188
+ if (mappedValue >= min && mappedValue <= max) {
189
+ array.push(mappedValue);
190
+ }
191
+ }
192
+ return array;
193
+ }
194
+
195
+
179
196
  function progress(thing, p) {
180
197
  readline.cursorTo(process.stdout, 0);
181
198
  process.stdout.write(`${thing} processed ... ${comma(p)}`);