make-mp-data 3.0.6 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dungeons/capstone/capstone-ic3.js +291 -0
- package/dungeons/capstone/capstone-ic4.js +598 -0
- package/dungeons/capstone/capstone-ic5.js +668 -0
- package/dungeons/capstone/generate-product-lookup.js +309 -0
- package/dungeons/complex.js +428 -0
- package/entry.js +1 -1
- package/index.js +7 -2
- package/lib/cli/cli.js +3 -11
- package/lib/core/config-validator.js +7 -10
- package/lib/orchestrators/mixpanel-sender.js +47 -36
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -13,6 +13,30 @@ under the hood, `make-mp-data` is modeling data adherent to match [Mixpanel's da
|
|
|
13
13
|
|
|
14
14
|
## 🚀 Quick Start
|
|
15
15
|
|
|
16
|
+
### Canonical Usage (v3.1.0+)
|
|
17
|
+
|
|
18
|
+
Two paths are guaranteed to "just work":
|
|
19
|
+
|
|
20
|
+
**1. As a CLI — send data to Mixpanel with one flag:**
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npx make-mp-data --token YOUR_PROJECT_TOKEN
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**2. As an ES module — bare `await` call:**
|
|
27
|
+
|
|
28
|
+
```javascript
|
|
29
|
+
import makeMpData from 'make-mp-data';
|
|
30
|
+
|
|
31
|
+
// Zero-config: generates events + users, returns them in memory
|
|
32
|
+
const result = await makeMpData({});
|
|
33
|
+
|
|
34
|
+
// Canonical "just send it": pass a token, data ships to Mixpanel
|
|
35
|
+
await makeMpData({ token: 'YOUR_PROJECT_TOKEN' });
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Both paths return a `Result` object with `eventData`, `userProfilesData`, `importResults`, `files`, `eventCount`, `userCount`, and timing info.
|
|
39
|
+
|
|
16
40
|
### Basic Usage
|
|
17
41
|
|
|
18
42
|
Generate events and users, and write them to CSV files:
|
|
@@ -87,6 +111,16 @@ Here's a breakdown of the CLI options you can use with `make-mp-data`:
|
|
|
87
111
|
- `--complex`: create a complex set models including groups, SCD, and lookup tables.
|
|
88
112
|
- `--simple`: create a simple dataset including events, and users
|
|
89
113
|
|
|
114
|
+
### Custom Dungeon Configs
|
|
115
|
+
|
|
116
|
+
Pass a path to your own dungeon `.js` file. CLI flags override values from the dungeon — your file provides defaults, the CLI tunes them:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
npx make-mp-data ./my-dungeon.js --numUsers 500 --token YOUR_TOKEN
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
CLI flag defaults (like `region`, `concurrency`) **do not** clobber explicit values in your dungeon — only flags you actually pass take effect.
|
|
123
|
+
|
|
90
124
|
## ⏱️ TimeSoup — Realistic Time Distributions
|
|
91
125
|
|
|
92
126
|
TimeSoup controls how events are distributed across time. Out of the box, it produces realistic day-of-week and hour-of-day patterns derived from real Mixpanel data (weekday-heavy, Saturday valley, morning peak).
|
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════
|
|
3
|
+
* DATASET OVERVIEW
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* IC3 Capstone — e-commerce dataset for Mixpanel certification.
|
|
7
|
+
* - 25,000 users over 180 days, ~5M events
|
|
8
|
+
* - Events: checkout (array-of-object cart), add to cart, view/save items
|
|
9
|
+
* - Nested product arrays with product_id lookups
|
|
10
|
+
* - Location, browser, session tracking enabled
|
|
11
|
+
*
|
|
12
|
+
* ═══════════════════════════════════════════════════════════════
|
|
13
|
+
* ANALYTICS HOOKS (1 pattern)
|
|
14
|
+
* ═══════════════════════════════════════════════════════════════
|
|
15
|
+
*
|
|
16
|
+
* 1. EVENT DUPLICATION (everything hook)
|
|
17
|
+
* Every user gets ~3-7 random events duplicated, simulating
|
|
18
|
+
* real-world duplicate event ingestion that students must
|
|
19
|
+
* identify and handle in their analysis.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import Chance from 'chance';
|
|
23
|
+
let chance = new Chance();
|
|
24
|
+
import dayjs from "dayjs";
|
|
25
|
+
import utc from "dayjs/plugin/utc.js";
|
|
26
|
+
dayjs.extend(utc);
|
|
27
|
+
import { uid, comma } from 'ak-tools';
|
|
28
|
+
import { pickAWinner, weighNumRange, date, integer, weighChoices } from "../brain/utils/utils.js";
|
|
29
|
+
|
|
30
|
+
const videoCategories = ["funny", "educational", "inspirational", "music", "news", "sports", "cooking", "DIY", "travel", "gaming"];
|
|
31
|
+
const spiritAnimals = ["duck", "dog", "otter", "penguin", "cat", "elephant", "lion", "cheetah", "giraffe", "zebra", "rhino", "hippo", "whale", "dolphin", "shark", "octopus", "squid", "jellyfish", "starfish", "seahorse", "crab", "lobster", "shrimp", "clam", "snail", "slug", "butterfly", "moth", "bee", "wasp", "ant", "beetle", "ladybug", "caterpillar", "centipede", "millipede", "scorpion", "spider", "tarantula", "tick", "mite", "mosquito", "fly", "dragonfly", "damselfly", "grasshopper", "cricket", "locust", "mantis", "cockroach", "termite", "praying mantis", "walking stick", "stick bug", "leaf insect", "lacewing", "aphid", "cicada", "thrips", "psyllid", "scale insect", "whitefly", "mealybug", "planthopper", "leafhopper", "treehopper", "flea", "louse", "bedbug", "flea beetle", "weevil", "longhorn beetle", "leaf beetle", "tiger beetle", "ground beetle", "lady beetle", "firefly", "click beetle", "rove beetle", "scarab beetle", "dung beetle", "stag beetle", "rhinoceros beetle", "hercules beetle", "goliath beetle", "jewel beetle", "tortoise beetle"];
|
|
32
|
+
|
|
33
|
+
/** @type {import('../types.js').Dungeon} */
|
|
34
|
+
const config = {
|
|
35
|
+
token: "",
|
|
36
|
+
seed: "IC3 baby!",
|
|
37
|
+
name: "ic3-capstone",
|
|
38
|
+
numDays: 180, //how many days worth1 of data
|
|
39
|
+
numEvents: 5_000_000, //how many events
|
|
40
|
+
numUsers: 25_000, //how many users
|
|
41
|
+
format: 'json', //csv or json
|
|
42
|
+
region: "US",
|
|
43
|
+
hasAnonIds: true, //if true, anonymousIds are created for each user
|
|
44
|
+
hasSessionIds: true, //if true, hasSessionIds are created for each user
|
|
45
|
+
hasAdSpend: false,
|
|
46
|
+
makeChart: false,
|
|
47
|
+
hasLocation: true,
|
|
48
|
+
hasAndroidDevices: false,
|
|
49
|
+
hasIOSDevices: false,
|
|
50
|
+
hasDesktopDevices: true,
|
|
51
|
+
hasBrowser: true,
|
|
52
|
+
hasCampaigns: false,
|
|
53
|
+
isAnonymous: false,
|
|
54
|
+
alsoInferFunnels: false,
|
|
55
|
+
concurrency: 1,
|
|
56
|
+
batchSize: 250_000,
|
|
57
|
+
writeToDisk: true,
|
|
58
|
+
events: [
|
|
59
|
+
{
|
|
60
|
+
event: "checkout",
|
|
61
|
+
weight: 2,
|
|
62
|
+
properties: {
|
|
63
|
+
currency: pickAWinner(["USD", "CAD", "EUR", "BTC", "ETH", "JPY"], 0),
|
|
64
|
+
coupon: weighChoices(["none", "none", "none", "none", "10%OFF", "20%OFF", "10%OFF", "20%OFF", "30%OFF", "40%OFF", "50%OFF"]),
|
|
65
|
+
cart: makeProducts()
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
event: "add to cart",
|
|
70
|
+
weight: 4,
|
|
71
|
+
properties: {
|
|
72
|
+
item: makeProducts(1),
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
event: "view item",
|
|
77
|
+
weight: 8,
|
|
78
|
+
properties: {
|
|
79
|
+
item: makeProducts(1)
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
event: "save item",
|
|
84
|
+
weight: 5,
|
|
85
|
+
properties: {
|
|
86
|
+
item: makeProducts(1),
|
|
87
|
+
}
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
event: "page view",
|
|
91
|
+
weight: 10,
|
|
92
|
+
properties: {
|
|
93
|
+
page: ["/", "/help", "/account", "/watch", "/listen", "/product", "/people", "/peace"],
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
event: "watch video",
|
|
98
|
+
weight: 8,
|
|
99
|
+
properties: {
|
|
100
|
+
watchTimeSec: weighNumRange(10, 600, .25),
|
|
101
|
+
}
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
event: "like video",
|
|
105
|
+
weight: 6,
|
|
106
|
+
properties: {
|
|
107
|
+
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
event: "dislike video",
|
|
112
|
+
weight: 4,
|
|
113
|
+
properties: {
|
|
114
|
+
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
event: "sign up",
|
|
119
|
+
weight: 1,
|
|
120
|
+
isFirstEvent: true,
|
|
121
|
+
properties: {
|
|
122
|
+
signupMethod: pickAWinner(["email", "google", "facebook", "twitter", "linkedin", "github"]),
|
|
123
|
+
referral: weighChoices(["none", "none", "none", "friend", "ad", "ad", "ad", "friend", "friend", "friend", "friend"]),
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
|
|
127
|
+
],
|
|
128
|
+
funnels: [
|
|
129
|
+
{
|
|
130
|
+
sequence: ["page view", "view item", "save item", "page view", "sign up"],
|
|
131
|
+
conversionRate: 50,
|
|
132
|
+
order: "first-and-last-fixed",
|
|
133
|
+
weight: 1,
|
|
134
|
+
isFirstFunnel: true,
|
|
135
|
+
timeToConvert: 2,
|
|
136
|
+
experiment: false,
|
|
137
|
+
name: "Signup Flow"
|
|
138
|
+
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
sequence: ["watch video", "like video", "watch video", "like video"],
|
|
142
|
+
name: "Video Likes",
|
|
143
|
+
conversionRate: 60,
|
|
144
|
+
props: {
|
|
145
|
+
videoCategory: videoCategories,
|
|
146
|
+
quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
|
|
147
|
+
format: ["mp4", "avi", "mov", "mpg"],
|
|
148
|
+
uploader_id: chance.guid.bind(chance)
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
name: "Video Dislikes",
|
|
153
|
+
sequence: ["watch video", "dislike video", "watch video", "dislike video"],
|
|
154
|
+
conversionRate: 20,
|
|
155
|
+
props: {
|
|
156
|
+
videoCategory: videoCategories,
|
|
157
|
+
quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
|
|
158
|
+
format: ["mp4", "avi", "mov", "mpg"],
|
|
159
|
+
uploader_id: chance.guid.bind(chance)
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
name: "eCommerce Purchase",
|
|
164
|
+
sequence: ["view item", "view item", "add to cart", "view item", "add to cart", "checkout"],
|
|
165
|
+
conversionRate: 15,
|
|
166
|
+
requireRepeats: true,
|
|
167
|
+
weight: 10,
|
|
168
|
+
order: "last-fixed",
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
],
|
|
172
|
+
superProps: {
|
|
173
|
+
theme: pickAWinner(["light", "dark", "custom", "light", "dark"]),
|
|
174
|
+
},
|
|
175
|
+
/*
|
|
176
|
+
user properties work the same as event properties
|
|
177
|
+
each key should be an array or function reference
|
|
178
|
+
*/
|
|
179
|
+
userProps: {
|
|
180
|
+
title: chance.profession.bind(chance),
|
|
181
|
+
luckyNumber: weighNumRange(1, 500, .3),
|
|
182
|
+
spiritAnimal: spiritAnimals
|
|
183
|
+
},
|
|
184
|
+
scdProps: {},
|
|
185
|
+
mirrorProps: {},
|
|
186
|
+
/*
|
|
187
|
+
for group analytics keys, we need an array of arrays [[],[],[]]
|
|
188
|
+
each pair represents a group_key and the number of profiles for that key
|
|
189
|
+
*/
|
|
190
|
+
groupKeys: [],
|
|
191
|
+
groupProps: {},
|
|
192
|
+
lookupTables: [{
|
|
193
|
+
key: "luckyNumber",
|
|
194
|
+
entries: 500,
|
|
195
|
+
attributes: {
|
|
196
|
+
present: ["happy", "sad", "angry", "confused", "excited", "bored", "curious", "anxious", "relaxed", "stressed"],
|
|
197
|
+
past: ["joyful", "melancholy", "furious", "puzzled", "thrilled", "weary", "inquiring", "nervous", "calm", "tense"],
|
|
198
|
+
future: ["hopeful", "dreadful", "optimistic", "skeptical", "eager", "lethargic", "inquiring", "apprehensive", "confident", "uneasy"],
|
|
199
|
+
weather: ["sunny", "rainy", "cloudy", "stormy", "snowy", "windy", "foggy", "humid", "dry", "chilly"],
|
|
200
|
+
temperature: weighNumRange(-10, 100)
|
|
201
|
+
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
}],
|
|
205
|
+
hook: function (record, type, meta) {
|
|
206
|
+
|
|
207
|
+
const NOW = dayjs();
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
if (type === "event") {
|
|
211
|
+
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (type === "everything") {
|
|
215
|
+
// every users has ~5 duplicate events
|
|
216
|
+
let numDupes = integer(3, 7);
|
|
217
|
+
if (record.length < numDupes) numDupes = record.length - 1;
|
|
218
|
+
// pick random events to duplicate
|
|
219
|
+
const dupesIndexes = [];
|
|
220
|
+
while (dupesIndexes.length < numDupes) {
|
|
221
|
+
const idx = integer(0, record.length - 1);
|
|
222
|
+
if (!dupesIndexes.includes(idx)) {
|
|
223
|
+
dupesIndexes.push(idx);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// duplicate those events
|
|
228
|
+
dupesIndexes.forEach(idx => {
|
|
229
|
+
const eventToDup = record[idx];
|
|
230
|
+
const newEvent = { ...eventToDup };
|
|
231
|
+
record.push(newEvent);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
return record;
|
|
240
|
+
}
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
function makeProducts(maxItems = 5) {
|
|
244
|
+
return function () {
|
|
245
|
+
const categories = ["electronics", "books", "clothing", "home", "garden", "toys", "sports", "automotive", "beauty", "health", "grocery", "jewelry", "shoes", "tools", "office supplies"];
|
|
246
|
+
const descriptors = ["brand new", "open box", "refurbished", "used", "like new", "vintage", "antique", "collectible"];
|
|
247
|
+
const suffix = ["item", "product", "good", "merchandise", "thing", "object", "widget", "gadget", "device", "apparatus", "contraption", "instrument", "tool", "implement", "utensil", "appliance", "machine", "equipment", "gear", "kit", "set", "package"];
|
|
248
|
+
const assetPreview = ['.png', '.jpg', '.jpeg', '.heic', '.mp4', '.mov', '.avi'];
|
|
249
|
+
const data = [];
|
|
250
|
+
const numOfItems = integer(1, maxItems);
|
|
251
|
+
|
|
252
|
+
for (var i = 0; i < numOfItems; i++) {
|
|
253
|
+
const category = chance.pickone(categories);
|
|
254
|
+
const descriptor = chance.pickone(descriptors);
|
|
255
|
+
const suffixWord = chance.pickone(suffix);
|
|
256
|
+
const slug = `${descriptor.replace(/\s+/g, '-').toLowerCase()}-${suffixWord.replace(/\s+/g, '-').toLowerCase()}`;
|
|
257
|
+
const asset = chance.pickone(assetPreview);
|
|
258
|
+
|
|
259
|
+
// const product_id = chance.guid();
|
|
260
|
+
const price = integer(1, 100);
|
|
261
|
+
const quantity = integer(1, 5);
|
|
262
|
+
|
|
263
|
+
const item = {
|
|
264
|
+
// product_id: product_id,
|
|
265
|
+
// sku: integer(11111, 99999),
|
|
266
|
+
amount: price,
|
|
267
|
+
quantity: quantity,
|
|
268
|
+
total_value: price * quantity,
|
|
269
|
+
featured: chance.pickone([true, false, false]),
|
|
270
|
+
category: category,
|
|
271
|
+
descriptor: descriptor,
|
|
272
|
+
slug: slug,
|
|
273
|
+
assetPreview: `https://example.com/assets/${slug}${asset}`,
|
|
274
|
+
assetType: asset
|
|
275
|
+
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
data.push(item);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return () => [data];
|
|
282
|
+
};
|
|
283
|
+
};
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
function flip(likelihood = 50) {
|
|
287
|
+
return chance.bool({ likelihood });
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
export default config;
|