make-mp-data 1.1.19 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.vscode/settings.json +1 -0
- package/README.md +1 -1
- package/cli.js +19 -1
- package/index.js +49 -127
- package/{default.js → models/complex.js} +3 -3
- package/models/deepNest.js +102 -0
- package/models/simple.js +133 -0
- package/package.json +2 -2
- package/tests/e2e.test.js +98 -41
- package/tests/unit.test.js +155 -0
- package/timesoup.js +92 -0
- package/utils.js +67 -101
package/.vscode/settings.json
CHANGED
package/README.md
CHANGED
package/cli.js
CHANGED
|
@@ -35,7 +35,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
|
|
|
35
35
|
type: 'string'
|
|
36
36
|
})
|
|
37
37
|
.option("seed", {
|
|
38
|
-
demandOption: false,
|
|
38
|
+
demandOption: false,
|
|
39
39
|
alias: 's',
|
|
40
40
|
describe: 'randomness seed; used to create distinct_ids',
|
|
41
41
|
type: 'string'
|
|
@@ -72,6 +72,24 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
|
|
|
72
72
|
describe: 'either US or EU',
|
|
73
73
|
type: 'string'
|
|
74
74
|
})
|
|
75
|
+
.options("complex", {
|
|
76
|
+
demandOption: false,
|
|
77
|
+
default: false,
|
|
78
|
+
describe: 'use complex data model (model all entities)',
|
|
79
|
+
alias: 'c',
|
|
80
|
+
type: 'boolean',
|
|
81
|
+
coerce: (value) => {
|
|
82
|
+
if (typeof value === 'boolean') return value;
|
|
83
|
+
if (value === 'true') {
|
|
84
|
+
return true;
|
|
85
|
+
}
|
|
86
|
+
if (value === 'false') {
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return true;
|
|
91
|
+
}
|
|
92
|
+
})
|
|
75
93
|
.option("writeToDisk", {
|
|
76
94
|
demandOption: false,
|
|
77
95
|
default: true,
|
package/index.js
CHANGED
|
@@ -14,23 +14,8 @@ const Chance = require("chance");
|
|
|
14
14
|
const chance = new Chance();
|
|
15
15
|
const { touch, comma, bytesHuman, mkdir } = require("ak-tools");
|
|
16
16
|
const Papa = require("papaparse");
|
|
17
|
-
const
|
|
18
|
-
|
|
19
|
-
pick,
|
|
20
|
-
day,
|
|
21
|
-
integer,
|
|
22
|
-
makeProducts,
|
|
23
|
-
date,
|
|
24
|
-
progress,
|
|
25
|
-
choose,
|
|
26
|
-
range,
|
|
27
|
-
exhaust,
|
|
28
|
-
openFinder,
|
|
29
|
-
applySkew,
|
|
30
|
-
boxMullerRandom,
|
|
31
|
-
getUniqueKeys,
|
|
32
|
-
person
|
|
33
|
-
} = require("./utils.js");
|
|
17
|
+
const u = require("./utils.js");
|
|
18
|
+
const AKsTimeSoup = require("./timesoup.js");
|
|
34
19
|
const dayjs = require("dayjs");
|
|
35
20
|
const utc = require("dayjs/plugin/utc");
|
|
36
21
|
dayjs.extend(utc);
|
|
@@ -42,20 +27,6 @@ let VERBOSE = false;
|
|
|
42
27
|
/** @typedef {import('./types.d.ts').Config} Config */
|
|
43
28
|
/** @typedef {import('./types.d.ts').EventConfig} EventConfig */
|
|
44
29
|
|
|
45
|
-
|
|
46
|
-
const PEAK_DAYS = [
|
|
47
|
-
dayjs().subtract(2, "day").unix(),
|
|
48
|
-
dayjs().subtract(3, "day").unix(),
|
|
49
|
-
dayjs().subtract(5, "day").unix(),
|
|
50
|
-
dayjs().subtract(7, "day").unix(),
|
|
51
|
-
dayjs().subtract(11, "day").unix(),
|
|
52
|
-
dayjs().subtract(13, "day").unix(),
|
|
53
|
-
dayjs().subtract(17, "day").unix(),
|
|
54
|
-
dayjs().subtract(19, "day").unix(),
|
|
55
|
-
dayjs().subtract(23, "day").unix(),
|
|
56
|
-
dayjs().subtract(29, "day").unix(),
|
|
57
|
-
];
|
|
58
|
-
|
|
59
30
|
/**
|
|
60
31
|
* generates fake mixpanel data
|
|
61
32
|
* @param {Config} config
|
|
@@ -72,7 +43,7 @@ async function main(config) {
|
|
|
72
43
|
favoriteColor: ["red", "green", "blue", "yellow"],
|
|
73
44
|
spiritAnimal: chance.animal,
|
|
74
45
|
},
|
|
75
|
-
scdProps = { NPS: weightedRange(0, 10, 150, 1.6) },
|
|
46
|
+
scdProps = { NPS: u.weightedRange(0, 10, 150, 1.6) },
|
|
76
47
|
groupKeys = [],
|
|
77
48
|
groupProps = {},
|
|
78
49
|
lookupTables = [],
|
|
@@ -86,7 +57,7 @@ async function main(config) {
|
|
|
86
57
|
} = config;
|
|
87
58
|
VERBOSE = verbose;
|
|
88
59
|
config.simulationName = makeName();
|
|
89
|
-
global.
|
|
60
|
+
global.MP_SIMULATION_CONFIG = config;
|
|
90
61
|
const uuidChance = new Chance(seed);
|
|
91
62
|
log(`------------------SETUP------------------`);
|
|
92
63
|
log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
|
|
@@ -97,18 +68,18 @@ async function main(config) {
|
|
|
97
68
|
//the function which generates $distinct_id + $anonymous_ids, $session_ids, and $created, skewing towards the present
|
|
98
69
|
function generateUser() {
|
|
99
70
|
const distinct_id = uuidChance.guid();
|
|
100
|
-
let z = boxMullerRandom();
|
|
71
|
+
let z = u.boxMullerRandom();
|
|
101
72
|
const skew = chance.normal({ mean: 10, dev: 3 });
|
|
102
|
-
z = applySkew(z, skew);
|
|
73
|
+
z = u.applySkew(z, skew);
|
|
103
74
|
|
|
104
75
|
// Scale and shift the normally distributed value to fit the range of days
|
|
105
|
-
const maxZ = integer(2, 4);
|
|
76
|
+
const maxZ = u.integer(2, 4);
|
|
106
77
|
const scaledZ = (z / maxZ + 1) / 2;
|
|
107
78
|
const daysAgoBorn = Math.round(scaledZ * (numDays - 1)) + 1;
|
|
108
79
|
|
|
109
80
|
return {
|
|
110
81
|
distinct_id,
|
|
111
|
-
...person(daysAgoBorn),
|
|
82
|
+
...u.person(daysAgoBorn),
|
|
112
83
|
};
|
|
113
84
|
}
|
|
114
85
|
|
|
@@ -134,16 +105,16 @@ async function main(config) {
|
|
|
134
105
|
const avgEvPerUser = Math.floor(numEvents / numUsers);
|
|
135
106
|
|
|
136
107
|
//user loop
|
|
137
|
-
log(`---------------SIMULATION----------------`,
|
|
108
|
+
log(`---------------SIMULATION----------------`, "\n\n");
|
|
138
109
|
for (let i = 1; i < numUsers + 1; i++) {
|
|
139
|
-
progress("users", i);
|
|
110
|
+
u.progress("users", i);
|
|
140
111
|
const user = generateUser();
|
|
141
112
|
const { distinct_id, $created, anonymousIds, sessionIds } = user;
|
|
142
113
|
userProfilesData.push(makeProfile(userProps, user));
|
|
143
114
|
const mutations = chance.integer({ min: 1, max: 10 });
|
|
144
115
|
scdTableData.push(makeSCD(scdProps, distinct_id, mutations, $created));
|
|
145
116
|
const numEventsThisUser = Math.round(
|
|
146
|
-
chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / integer(3, 7) })
|
|
117
|
+
chance.normal({ mean: avgEvPerUser, dev: avgEvPerUser / u.integer(3, 7) })
|
|
147
118
|
);
|
|
148
119
|
|
|
149
120
|
if (firstEvents.length) {
|
|
@@ -187,7 +158,7 @@ async function main(config) {
|
|
|
187
158
|
const groupCardinality = groupPair[1];
|
|
188
159
|
const groupProfiles = [];
|
|
189
160
|
for (let i = 1; i < groupCardinality + 1; i++) {
|
|
190
|
-
progress("groups", i);
|
|
161
|
+
u.progress("groups", i);
|
|
191
162
|
const group = {
|
|
192
163
|
[groupKey]: i,
|
|
193
164
|
...makeProfile(groupProps[groupKey]),
|
|
@@ -204,7 +175,7 @@ async function main(config) {
|
|
|
204
175
|
const { key, entries, attributes } = lookupTable;
|
|
205
176
|
const data = [];
|
|
206
177
|
for (let i = 1; i < entries + 1; i++) {
|
|
207
|
-
progress("lookups", i);
|
|
178
|
+
u.progress("lookups", i);
|
|
208
179
|
const item = {
|
|
209
180
|
[key]: i,
|
|
210
181
|
...makeProfile(attributes),
|
|
@@ -236,17 +207,19 @@ async function main(config) {
|
|
|
236
207
|
log(`-----------------WRITES------------------`, `\n\n`);
|
|
237
208
|
//write the files
|
|
238
209
|
if (writeToDisk) {
|
|
239
|
-
if (verbose) log(`writing files... for ${config.simulationName}`);
|
|
240
|
-
for (const pair of pairs) {
|
|
210
|
+
if (verbose) log(`writing files... for ${config.simulationName}\n`);
|
|
211
|
+
loopFiles: for (const pair of pairs) {
|
|
241
212
|
const [paths, data] = pair;
|
|
213
|
+
if (!data.length) continue loopFiles;
|
|
242
214
|
for (const path of paths) {
|
|
243
215
|
let datasetsToWrite;
|
|
244
216
|
if (data?.[0]?.["key"]) datasetsToWrite = data.map((d) => d.data);
|
|
245
217
|
else datasetsToWrite = [data];
|
|
246
218
|
for (const writeData of datasetsToWrite) {
|
|
247
|
-
if
|
|
219
|
+
//if it's a lookup table, it's always a CSV
|
|
220
|
+
if (format === "csv" || path.includes("-LOOKUP.csv")) {
|
|
248
221
|
log(`writing ${path}`);
|
|
249
|
-
const columns = getUniqueKeys(writeData);
|
|
222
|
+
const columns = u.getUniqueKeys(writeData);
|
|
250
223
|
//papa parse needs nested JSON stringified
|
|
251
224
|
writeData.forEach((e) => {
|
|
252
225
|
for (const key in e) {
|
|
@@ -348,6 +321,7 @@ function makeProfile(props, defaults) {
|
|
|
348
321
|
}
|
|
349
322
|
|
|
350
323
|
function makeSCD(props, distinct_id, mutations, $created) {
|
|
324
|
+
if (JSON.stringify(props) === "{}") return [];
|
|
351
325
|
const scdEntries = [];
|
|
352
326
|
let lastInserted = dayjs($created);
|
|
353
327
|
const deltaDays = dayjs().diff(lastInserted, "day");
|
|
@@ -356,12 +330,12 @@ function makeSCD(props, distinct_id, mutations, $created) {
|
|
|
356
330
|
if (lastInserted.isAfter(dayjs())) break;
|
|
357
331
|
const scd = makeProfile(props, { distinct_id });
|
|
358
332
|
scd.startTime = lastInserted.toISOString();
|
|
359
|
-
lastInserted = lastInserted.add(integer(1, 1000), "seconds");
|
|
333
|
+
lastInserted = lastInserted.add(u.integer(1, 1000), "seconds");
|
|
360
334
|
scd.insertTime = lastInserted.toISOString();
|
|
361
335
|
scdEntries.push({ ...scd });
|
|
362
336
|
lastInserted = lastInserted
|
|
363
|
-
.add(integer(0, deltaDays), "day")
|
|
364
|
-
.subtract(integer(1, 1000), "seconds");
|
|
337
|
+
.add(u.integer(0, deltaDays), "day")
|
|
338
|
+
.subtract(u.integer(1, 1000), "seconds");
|
|
365
339
|
}
|
|
366
340
|
|
|
367
341
|
return scdEntries;
|
|
@@ -394,11 +368,11 @@ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events,
|
|
|
394
368
|
|
|
395
369
|
//event time
|
|
396
370
|
if (isFirstEvent) event.time = dayjs.unix(earliestTime).toISOString();
|
|
397
|
-
if (!isFirstEvent) event.time = AKsTimeSoup(earliestTime, NOW
|
|
371
|
+
if (!isFirstEvent) event.time = AKsTimeSoup(earliestTime, NOW);
|
|
398
372
|
|
|
399
373
|
// anonymous and session ids
|
|
400
|
-
if (global?.
|
|
401
|
-
if (global?.
|
|
374
|
+
if (global.MP_SIMULATION_CONFIG?.anonIds) event.$device_id = chance.pickone(anonymousIds);
|
|
375
|
+
if (global.MP_SIMULATION_CONFIG?.sessionIds) event.$session_id = chance.pickone(sessionIds);
|
|
402
376
|
|
|
403
377
|
//sometimes have a $user_id
|
|
404
378
|
if (!isFirstEvent && chance.bool({ likelihood: 42 })) event.$user_id = distinct_id;
|
|
@@ -411,7 +385,7 @@ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events,
|
|
|
411
385
|
//iterate through custom properties
|
|
412
386
|
for (const key in props) {
|
|
413
387
|
try {
|
|
414
|
-
event[key] = choose(props[key]);
|
|
388
|
+
event[key] = u.choose(props[key]);
|
|
415
389
|
} catch (e) {
|
|
416
390
|
console.error(`error with ${key} in ${chosenEvent.event} event`, e);
|
|
417
391
|
debugger;
|
|
@@ -423,7 +397,7 @@ function makeEvent(distinct_id, anonymousIds, sessionIds, earliestTime, events,
|
|
|
423
397
|
const groupKey = groupPair[0];
|
|
424
398
|
const groupCardinality = groupPair[1];
|
|
425
399
|
|
|
426
|
-
event[groupKey] = pick(weightedRange(1, groupCardinality))
|
|
400
|
+
event[groupKey] = u.pick(u.weightedRange(1, groupCardinality));
|
|
427
401
|
}
|
|
428
402
|
|
|
429
403
|
//make $insert_id
|
|
@@ -441,9 +415,9 @@ function buildFileNames(config) {
|
|
|
441
415
|
if (config.writeToDisk) writeDir = mkdir("./data");
|
|
442
416
|
|
|
443
417
|
const writePaths = {
|
|
444
|
-
eventFiles: [path.join(writeDir,
|
|
445
|
-
userFiles: [path.join(writeDir,
|
|
446
|
-
scdFiles: [path.join(writeDir,
|
|
418
|
+
eventFiles: [path.join(writeDir, `${simName}-EVENTS.${extension}`)],
|
|
419
|
+
userFiles: [path.join(writeDir, `${simName}-USERS.${extension}`)],
|
|
420
|
+
scdFiles: [path.join(writeDir, `${simName}-SCD.${extension}`)],
|
|
447
421
|
groupFiles: [],
|
|
448
422
|
lookupFiles: [],
|
|
449
423
|
folder: writeDir,
|
|
@@ -452,90 +426,28 @@ function buildFileNames(config) {
|
|
|
452
426
|
for (const groupPair of groupKeys) {
|
|
453
427
|
const groupKey = groupPair[0];
|
|
454
428
|
writePaths.groupFiles.push(
|
|
455
|
-
path.join(writeDir,
|
|
429
|
+
path.join(writeDir, `${simName}-${groupKey}-GROUP.${extension}`)
|
|
456
430
|
);
|
|
457
431
|
}
|
|
458
432
|
|
|
459
433
|
for (const lookupTable of lookupTables) {
|
|
460
434
|
const { key } = lookupTable;
|
|
461
435
|
writePaths.lookupFiles.push(
|
|
462
|
-
|
|
436
|
+
//lookups are always CSVs
|
|
437
|
+
path.join(writeDir, `${simName}-${key}-LOOKUP.csv`)
|
|
463
438
|
);
|
|
464
439
|
}
|
|
465
440
|
|
|
466
441
|
return writePaths;
|
|
467
442
|
}
|
|
468
443
|
|
|
469
|
-
/**
|
|
470
|
-
* essentially, a timestamp generator with a twist
|
|
471
|
-
* @param {number} earliestTime - The earliest timestamp in Unix format.
|
|
472
|
-
* @param {number} latestTime - The latest timestamp in Unix format.
|
|
473
|
-
* @param {Array} peakDays - Array of Unix timestamps representing the start of peak days.
|
|
474
|
-
* @returns {number} - The generated event timestamp in Unix format.
|
|
475
|
-
*/
|
|
476
|
-
function AKsTimeSoup(earliestTime, latestTime = NOW, peakDays = PEAK_DAYS) {
|
|
477
|
-
let chosenTime;
|
|
478
|
-
let eventTime;
|
|
479
|
-
let validTime = false;
|
|
480
|
-
|
|
481
|
-
if (typeof earliestTime !== "number") {
|
|
482
|
-
if (parseInt(earliestTime) > 0) earliestTime = parseInt(earliestTime);
|
|
483
|
-
if (dayjs(earliestTime).isValid()) earliestTime = dayjs(earliestTime).unix();
|
|
484
|
-
}
|
|
485
|
-
|
|
486
|
-
while (!validTime) {
|
|
487
|
-
|
|
488
|
-
// Define business hours
|
|
489
|
-
const peakStartHour = 4; // 4 AM
|
|
490
|
-
const peakEndHour = 23; // 11 PM
|
|
491
|
-
const likelihoodOfPeakDay = chance.integer({ min: integer(5, 42), max: integer(43, 69) }); // Randomize likelihood with CHAOS!~~
|
|
492
|
-
|
|
493
|
-
// Select a day, with a preference for peak days
|
|
494
|
-
let selectedDay;
|
|
495
|
-
if (chance.bool({ likelihood: likelihoodOfPeakDay })) { // Randomized likelihood to pick a peak day
|
|
496
|
-
selectedDay = peakDays.length > 0 ? chance.pickone(peakDays) : integer(earliestTime, latestTime);
|
|
497
|
-
} else {
|
|
498
|
-
// Introduce minor peaks by allowing some events to still occur during business hours
|
|
499
|
-
selectedDay = chance.bool({ likelihood: integer(1, 42) })
|
|
500
|
-
? chance.pickone(peakDays)
|
|
501
|
-
: integer(earliestTime, latestTime);
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
// Normalize selectedDay to the start of the day
|
|
505
|
-
selectedDay = dayjs.unix(selectedDay).startOf('day').unix();
|
|
506
|
-
|
|
507
|
-
// Generate a random time within business hours with a higher concentration in the middle of the period
|
|
508
|
-
const businessStart = dayjs.unix(selectedDay).hour(peakStartHour).minute(0).second(0).unix();
|
|
509
|
-
const businessEnd = dayjs.unix(selectedDay).hour(peakEndHour).minute(0).second(0).unix();
|
|
510
|
-
|
|
511
|
-
if (selectedDay === peakDays[0]) {
|
|
512
|
-
// Use a skewed distribution for peak days
|
|
513
|
-
eventTime = chance.normal({ mean: (businessEnd + businessStart) / integer(1, 4), dev: (businessEnd - businessStart) / integer(2, 8) });
|
|
514
|
-
} else {
|
|
515
|
-
// For non-peak days, use a uniform distribution to add noise
|
|
516
|
-
eventTime = integer(integer(businessStart, businessEnd), integer(businessStart, businessEnd));
|
|
517
|
-
}
|
|
518
|
-
|
|
519
|
-
// usually, ensure the event time is within business hours
|
|
520
|
-
if (chance.bool({ likelihood: 42 })) eventTime = Math.min(Math.max(eventTime, businessStart), businessEnd);
|
|
521
|
-
|
|
522
|
-
if (eventTime > 0) validTime = true;
|
|
523
|
-
const parsedTime = dayjs.unix(eventTime).toISOString();
|
|
524
|
-
if (!parsedTime.startsWith('20')) validTime = false;
|
|
525
|
-
|
|
526
|
-
}
|
|
527
|
-
chosenTime = dayjs.unix(eventTime).toISOString();
|
|
528
|
-
if (eventTime < 0) debugger;
|
|
529
|
-
if (!chosenTime.startsWith('20')) debugger;
|
|
530
|
-
return chosenTime;
|
|
531
|
-
}
|
|
532
444
|
|
|
533
445
|
|
|
534
446
|
|
|
535
447
|
// this is for CLI
|
|
536
448
|
if (require.main === module) {
|
|
537
449
|
const args = cliParams();
|
|
538
|
-
const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk } = args;
|
|
450
|
+
const { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false } = args;
|
|
539
451
|
const suppliedConfig = args._[0];
|
|
540
452
|
|
|
541
453
|
//if the user specifics an separate config file
|
|
@@ -543,9 +455,18 @@ if (require.main === module) {
|
|
|
543
455
|
if (suppliedConfig) {
|
|
544
456
|
log(`using ${suppliedConfig} for data\n`);
|
|
545
457
|
config = require(path.resolve(suppliedConfig));
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
|
|
458
|
+
}
|
|
459
|
+
else {
|
|
460
|
+
if (complex) {
|
|
461
|
+
log(`... using default COMPLEX configuration [everything] ...\n`);
|
|
462
|
+
log(`... for more simple data, don't use the --complex flag ...\n`);
|
|
463
|
+
config = require(path.resolve("./models/complex.js"));
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
log(`... using default SIMPLE configuration [events + users] ...\n`);
|
|
467
|
+
log(`... for more complex data, use the --complex flag ...\n`);
|
|
468
|
+
config = require(path.resolve("./models/simple.js"));
|
|
469
|
+
}
|
|
549
470
|
}
|
|
550
471
|
|
|
551
472
|
//override config with cli params
|
|
@@ -592,9 +513,10 @@ if (require.main === module) {
|
|
|
592
513
|
})
|
|
593
514
|
.finally(() => {
|
|
594
515
|
log("have a wonderful day :)");
|
|
595
|
-
openFinder(path.resolve("./data"));
|
|
516
|
+
u.openFinder(path.resolve("./data"));
|
|
596
517
|
});
|
|
597
518
|
} else {
|
|
519
|
+
main.utils = { ...u };
|
|
598
520
|
main.timeSoup = AKsTimeSoup;
|
|
599
521
|
module.exports = main;
|
|
600
522
|
}
|
|
@@ -8,12 +8,12 @@
|
|
|
8
8
|
|
|
9
9
|
const Chance = require('chance');
|
|
10
10
|
const chance = new Chance();
|
|
11
|
-
const { weightedRange, makeProducts, date,
|
|
11
|
+
const { weightedRange, makeProducts, date, makeHashTags } = require('../utils.js');
|
|
12
12
|
|
|
13
|
-
/** @type {import('
|
|
13
|
+
/** @type {import('../types.js').Config} */
|
|
14
14
|
const config = {
|
|
15
15
|
token: "",
|
|
16
|
-
seed: "
|
|
16
|
+
seed: "quite complex",
|
|
17
17
|
numDays: 30, //how many days worth of data
|
|
18
18
|
numEvents: 100000, //how many events
|
|
19
19
|
numUsers: 1000, //how many users
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
const Chance = require('chance');
|
|
2
|
+
const chance = new Chance();
|
|
3
|
+
|
|
4
|
+
const config = {
|
|
5
|
+
token: "",
|
|
6
|
+
secret: "",
|
|
7
|
+
seed: "get nesty!",
|
|
8
|
+
events: ['watch video', 'upload video', 'like video', 'dislike video', 'subscribe'],
|
|
9
|
+
eventProperties: {
|
|
10
|
+
videoMeta: generateVideoMeta
|
|
11
|
+
},
|
|
12
|
+
userProperties: {
|
|
13
|
+
userMeta: generateUserMeta
|
|
14
|
+
|
|
15
|
+
},
|
|
16
|
+
|
|
17
|
+
groupKeys: [],
|
|
18
|
+
groupProperties: {}
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
let formats = ['2160p', '1440p', '1080p', '720p', '480p', '360p', '240p'];
|
|
22
|
+
let ratios = ['4:3', '16:10', '16:9'];
|
|
23
|
+
let containers = ["WEBM", ["MPG", "MP2", "MPEG"], ["MP4", "M4P", "M4V"], ["AVI", "WMV"], ["MOV", "QT"], ["FLV", "SWF"], "AVCHD"];
|
|
24
|
+
let hashtags = ["#AK", "#bitcoin", "#cureForMiley", "#faceValue", "#blm", "#fwiw", "#inappropriateFuneralSongs", "#jurassicPork", "#lolCats", "#wheatForSheep", "#momTexts", "#myWeirdGymStory", "#poppy", "#resist", "#tbt", "#wilson", "#worstGiftEver", "#yolo", "#phish", "#crypto", "#memes", "#wrongMovie", "#careerEndingTwitterTypos", "#twoThingsThatDontMix"];
|
|
25
|
+
let platforms = ["Web", "Mobile Web", "Native (Android)", "Native (iOS)", "Native (Desktop)", "IoT"];
|
|
26
|
+
let plans = ['free', 'premium', 'casual', 'influencer'];
|
|
27
|
+
let categories = ["Product reviews video", "How-to videos", "Vlogs", "Gaming videos", "Comedy/skit videos", "Haul videos", "Memes/tags", "Favorites/best of", "Educational videos", "Unboxing videos", "Q&A videos", "Collection", "Prank videos"];
|
|
28
|
+
let marketingChannels = ["Organic", "Organic", "Organic", "Organic", "Instagram Ads", "Facebook Ads", "Google Ads", "Youtube Ads", "Instagram Post", "Instagram Post", "Facebook Post"];
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
function generateVideoMeta() {
|
|
32
|
+
|
|
33
|
+
let videoTemplate = {
|
|
34
|
+
videoFormatInfo: {
|
|
35
|
+
availableFormats: chance.pickset(formats, int(1, formats.length)),
|
|
36
|
+
availableAspectRatios: chance.pickset(ratios, int(1, ratios.length)),
|
|
37
|
+
availableContainerFormats: chance.pickset(containers, int(1, containers.length)),
|
|
38
|
+
observedLatencyTimestamps: chance.pickset([].range(1, 300000), int(1, 40))
|
|
39
|
+
},
|
|
40
|
+
videoStats: {
|
|
41
|
+
numerOfPlays: int(10, 10000000),
|
|
42
|
+
isReccommendedVideo: chance.bool(),
|
|
43
|
+
inPlaylists: chance.pickset(hashtags, int(1, hashtags.length)),
|
|
44
|
+
likers: chance.n(chance.guid, int(3, 100)),
|
|
45
|
+
dislikers: chance.n(chance.guid, int(3, 100)),
|
|
46
|
+
},
|
|
47
|
+
videoContentInfo: {
|
|
48
|
+
categories: {
|
|
49
|
+
hashtags: chance.pickset(hashtags, int(1, 10)),
|
|
50
|
+
category: chance.pickone(categories),
|
|
51
|
+
},
|
|
52
|
+
availibility: {
|
|
53
|
+
hasAdvertisements: chance.bool(),
|
|
54
|
+
canBeSeenOnPlans: chance.pickset(plans, int(1, plans.length)),
|
|
55
|
+
releaseInfo: {
|
|
56
|
+
isReleased: chance.bool({ likelihood: 90 }),
|
|
57
|
+
relaseDate: chance.date({ year: 2021 })
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
},
|
|
62
|
+
uploaderInfo: {
|
|
63
|
+
platform: chance.pickone(platforms),
|
|
64
|
+
uuid: chance.guid(),
|
|
65
|
+
plan: chance.pickone(plans)
|
|
66
|
+
},
|
|
67
|
+
viewerInfo: {
|
|
68
|
+
platform: chance.pickone(platforms),
|
|
69
|
+
uuid: chance.guid(),
|
|
70
|
+
plan: chance.pickone(plans)
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
return videoTemplate;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function generateUserMeta() {
|
|
78
|
+
|
|
79
|
+
let userTemplate = {
|
|
80
|
+
favoriteNumber: chance.prime(),
|
|
81
|
+
attributionChain: chance.pickset(marketingChannels, int(1, 10)),
|
|
82
|
+
importantUserDates: {
|
|
83
|
+
firstSeenDate: chance.date({ year: 2010 }),
|
|
84
|
+
firstPurchaseDate: chance.date({ year: 2011 }),
|
|
85
|
+
firstSubscribeDate: chance.date({ year: 2011 }),
|
|
86
|
+
lastPurchaseDate: chance.date({ year: 2012 })
|
|
87
|
+
|
|
88
|
+
},
|
|
89
|
+
plan: chance.pickone(plans),
|
|
90
|
+
followers: chance.n(chance.guid, int(1, 100)),
|
|
91
|
+
follows: chance.n(chance.guid, int(1, 100))
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
return userTemplate;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
function int(min, max) {
|
|
99
|
+
return chance.integer({ min, max });
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
module.exports = config;
|
package/models/simple.js
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
const Chance = require('chance');
|
|
2
|
+
const chance = new Chance();
|
|
3
|
+
const dayjs = require("dayjs");
|
|
4
|
+
const utc = require("dayjs/plugin/utc");
|
|
5
|
+
dayjs.extend(utc);
|
|
6
|
+
const { uid, comma } = require('ak-tools');
|
|
7
|
+
const { weighList, weightedRange, date, integer } = require('../utils')
|
|
8
|
+
|
|
9
|
+
const itemCategories = ["Books", "Movies", "Music", "Games", "Electronics", "Computers", "Smart Home", "Home", "Garden", "Pet", "Beauty", "Health", "Toys", "Kids", "Baby", "Handmade", "Sports", "Outdoors", "Automotive", "Industrial", "Entertainment", "Art", "Food", "Appliances", "Office", "Wedding", "Software"];
|
|
10
|
+
|
|
11
|
+
const videoCategories = ["funny", "educational", "inspirational", "music", "news", "sports", "cooking", "DIY", "travel", "gaming"];
|
|
12
|
+
|
|
13
|
+
/** @type {import('../types').Config} */
|
|
14
|
+
const config = {
|
|
15
|
+
token: "",
|
|
16
|
+
seed: "simple is best",
|
|
17
|
+
numDays: 30, //how many days worth of data
|
|
18
|
+
numEvents: 50000, //how many events
|
|
19
|
+
numUsers: 500, //how many users
|
|
20
|
+
format: 'csv', //csv or json
|
|
21
|
+
region: "US",
|
|
22
|
+
anonIds: false, //if true, anonymousIds are created for each user
|
|
23
|
+
sessionIds: false, //if true, sessionIds are created for each user
|
|
24
|
+
|
|
25
|
+
events: [
|
|
26
|
+
{
|
|
27
|
+
"event": "checkout",
|
|
28
|
+
"weight": 2,
|
|
29
|
+
"properties": {
|
|
30
|
+
amount: weightedRange(5, 500, 1000, .25),
|
|
31
|
+
currency: ["USD", "CAD", "EUR", "BTC", "ETH", "JPY"],
|
|
32
|
+
coupon: ["none", "none", "none", "none", "10%OFF", "20%OFF", "10%OFF", "20%OFF", "30%OFF", "40%OFF", "50%OFF"],
|
|
33
|
+
numItems: weightedRange(1, 10),
|
|
34
|
+
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"event": "add to cart",
|
|
39
|
+
"weight": 4,
|
|
40
|
+
"properties": {
|
|
41
|
+
amount: weightedRange(5, 500, 1000, .25),
|
|
42
|
+
rating: weightedRange(1, 5),
|
|
43
|
+
reviews: weightedRange(0, 35),
|
|
44
|
+
isFeaturedItem: [true, false, false],
|
|
45
|
+
itemCategory: weighList(itemCategories, integer(0, 27)),
|
|
46
|
+
dateItemListed: date(30, true, 'YYYY-MM-DD'),
|
|
47
|
+
itemId: integer(1000, 9999),
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"event": "page view",
|
|
52
|
+
"weight": 10,
|
|
53
|
+
"properties": {
|
|
54
|
+
page: ["/", "/", "/help", "/account", "/watch", "/listen", "/product", "/people", "/peace"],
|
|
55
|
+
utm_source: ["$organic", "$organic", "$organic", "$organic", "google", "google", "google", "facebook", "facebook", "twitter", "linkedin"],
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"event": "watch video",
|
|
60
|
+
"weight": 8,
|
|
61
|
+
"properties": {
|
|
62
|
+
videoCategory: weighList(videoCategories, integer(0, 9)),
|
|
63
|
+
isFeaturedItem: [true, false, false],
|
|
64
|
+
watchTimeSec: weightedRange(10, 600, 1000, .25),
|
|
65
|
+
quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
|
|
66
|
+
format: ["mp4", "avi", "mov", "mpg"],
|
|
67
|
+
uploader_id: chance.guid.bind(chance)
|
|
68
|
+
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"event": "view item",
|
|
73
|
+
"weight": 8,
|
|
74
|
+
"properties": {
|
|
75
|
+
isFeaturedItem: [true, false, false],
|
|
76
|
+
itemCategory: weighList(itemCategories, integer(0, 27)),
|
|
77
|
+
dateItemListed: date(30, true, 'YYYY-MM-DD'),
|
|
78
|
+
itemId: integer(1000, 9999),
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"event": "save item",
|
|
83
|
+
"weight": 5,
|
|
84
|
+
"properties": {
|
|
85
|
+
isFeaturedItem: [true, false, false],
|
|
86
|
+
itemCategory: weighList(itemCategories, integer(0, 27)),
|
|
87
|
+
dateItemListed: date(30, true, 'YYYY-MM-DD'),
|
|
88
|
+
itemId: integer(1000, 9999),
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"event": "sign up",
|
|
93
|
+
"isFirstEvent": true,
|
|
94
|
+
"weight": 0,
|
|
95
|
+
"properties": {
|
|
96
|
+
variants: ["A", "B", "C", "Control"],
|
|
97
|
+
flows: ["new", "existing", "loyal", "churned"],
|
|
98
|
+
flags: ["on", "off"],
|
|
99
|
+
experiment_ids: ["1234", "5678", "9012", "3456", "7890"],
|
|
100
|
+
multiVariate: [true, false]
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
],
|
|
104
|
+
superProps: {
|
|
105
|
+
platform: ["web", "mobile", "web", "mobile", "web", "web", "kiosk", "smartTV"],
|
|
106
|
+
currentTheme: ["light", "dark", "custom", "light", "dark"],
|
|
107
|
+
// emotions: generateEmoji(),
|
|
108
|
+
|
|
109
|
+
},
|
|
110
|
+
/*
|
|
111
|
+
user properties work the same as event properties
|
|
112
|
+
each key should be an array or function reference
|
|
113
|
+
*/
|
|
114
|
+
userProps: {
|
|
115
|
+
title: chance.profession.bind(chance),
|
|
116
|
+
luckyNumber: weightedRange(42, 420),
|
|
117
|
+
spiritAnimal: chance.animal.bind(chance)
|
|
118
|
+
},
|
|
119
|
+
|
|
120
|
+
scdProps: { },
|
|
121
|
+
|
|
122
|
+
/*
|
|
123
|
+
for group analytics keys, we need an array of arrays [[],[],[]]
|
|
124
|
+
each pair represents a group_key and the number of profiles for that key
|
|
125
|
+
*/
|
|
126
|
+
groupKeys: [],
|
|
127
|
+
groupProps: {},
|
|
128
|
+
lookupTables: [],
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
module.exports = config;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "make-mp-data",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.01",
|
|
4
4
|
"description": "builds all mixpanel primitives for a given project",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "types.d.ts",
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
"prune": "rm ./data/*",
|
|
10
10
|
"go": "sh ./scripts/go.sh",
|
|
11
11
|
"post": "npm publish",
|
|
12
|
-
"test": "jest",
|
|
12
|
+
"test": "jest --runInBand",
|
|
13
13
|
"deps": "sh ./scripts/deps.sh"
|
|
14
14
|
},
|
|
15
15
|
"repository": {
|