make-mp-data 3.0.6 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -13,6 +13,30 @@ under the hood, `make-mp-data` is modeling data adherent to match [Mixpanel's da
13
13
 
14
14
  ## 🚀 Quick Start
15
15
 
16
+ ### Canonical Usage (v3.1.0+)
17
+
18
+ Two paths are guaranteed to "just work":
19
+
20
+ **1. As a CLI — send data to Mixpanel with one flag:**
21
+
22
+ ```bash
23
+ npx make-mp-data --token YOUR_PROJECT_TOKEN
24
+ ```
25
+
26
+ **2. As an ES module — bare `await` call:**
27
+
28
+ ```javascript
29
+ import makeMpData from 'make-mp-data';
30
+
31
+ // Zero-config: generates events + users, returns them in memory
32
+ const result = await makeMpData({});
33
+
34
+ // Canonical "just send it": pass a token, data ships to Mixpanel
35
+ await makeMpData({ token: 'YOUR_PROJECT_TOKEN' });
36
+ ```
37
+
38
+ Both paths return a `Result` object with `eventData`, `userProfilesData`, `importResults`, `files`, `eventCount`, `userCount`, and timing info.
39
+
16
40
  ### Basic Usage
17
41
 
18
42
  Generate events and users, and write them to CSV files:
@@ -87,6 +111,16 @@ Here's a breakdown of the CLI options you can use with `make-mp-data`:
87
111
  - `--complex`: create a complex set models including groups, SCD, and lookup tables.
88
112
  - `--simple`: create a simple dataset including events, and users
89
113
 
114
+ ### Custom Dungeon Configs
115
+
116
+ Pass a path to your own dungeon `.js` file. CLI flags override values from the dungeon — your file provides defaults, the CLI tunes them:
117
+
118
+ ```bash
119
+ npx make-mp-data ./my-dungeon.js --numUsers 500 --token YOUR_TOKEN
120
+ ```
121
+
122
+ CLI flag defaults (like `region`, `concurrency`) **do not** clobber explicit values in your dungeon — only flags you actually pass take effect.
123
+
90
124
  ## ⏱️ TimeSoup — Realistic Time Distributions
91
125
 
92
126
  TimeSoup controls how events are distributed across time. Out of the box, it produces realistic day-of-week and hour-of-day patterns derived from real Mixpanel data (weekday-heavy, Saturday valley, morning peak).
@@ -0,0 +1,291 @@
1
+ /**
2
+ * ═══════════════════════════════════════════════════════════════
3
+ * DATASET OVERVIEW
4
+ * ═══════════════════════════════════════════════════════════════
5
+ *
6
+ * IC3 Capstone — e-commerce dataset for Mixpanel certification.
7
+ * - 25,000 users over 180 days, ~5M events
8
+ * - Events: checkout (array-of-object cart), add to cart, view/save items
9
+ * - Nested product arrays with product_id lookups
10
+ * - Location, browser, session tracking enabled
11
+ *
12
+ * ═══════════════════════════════════════════════════════════════
13
+ * ANALYTICS HOOKS (1 pattern)
14
+ * ═══════════════════════════════════════════════════════════════
15
+ *
16
+ * 1. EVENT DUPLICATION (everything hook)
17
+ * Every user gets ~3-7 random events duplicated, simulating
18
+ * real-world duplicate event ingestion that students must
19
+ * identify and handle in their analysis.
20
+ */
21
+
22
+ import Chance from 'chance';
23
+ let chance = new Chance();
24
+ import dayjs from "dayjs";
25
+ import utc from "dayjs/plugin/utc.js";
26
+ dayjs.extend(utc);
27
+ import { uid, comma } from 'ak-tools';
28
+ import { pickAWinner, weighNumRange, date, integer, weighChoices } from "../brain/utils/utils.js";
29
+
30
+ const videoCategories = ["funny", "educational", "inspirational", "music", "news", "sports", "cooking", "DIY", "travel", "gaming"];
31
+ const spiritAnimals = ["duck", "dog", "otter", "penguin", "cat", "elephant", "lion", "cheetah", "giraffe", "zebra", "rhino", "hippo", "whale", "dolphin", "shark", "octopus", "squid", "jellyfish", "starfish", "seahorse", "crab", "lobster", "shrimp", "clam", "snail", "slug", "butterfly", "moth", "bee", "wasp", "ant", "beetle", "ladybug", "caterpillar", "centipede", "millipede", "scorpion", "spider", "tarantula", "tick", "mite", "mosquito", "fly", "dragonfly", "damselfly", "grasshopper", "cricket", "locust", "mantis", "cockroach", "termite", "praying mantis", "walking stick", "stick bug", "leaf insect", "lacewing", "aphid", "cicada", "thrips", "psyllid", "scale insect", "whitefly", "mealybug", "planthopper", "leafhopper", "treehopper", "flea", "louse", "bedbug", "flea beetle", "weevil", "longhorn beetle", "leaf beetle", "tiger beetle", "ground beetle", "lady beetle", "firefly", "click beetle", "rove beetle", "scarab beetle", "dung beetle", "stag beetle", "rhinoceros beetle", "hercules beetle", "goliath beetle", "jewel beetle", "tortoise beetle"];
32
+
33
+ /** @type {import('../types.js').Dungeon} */
34
+ const config = {
35
+ token: "",
36
+ seed: "IC3 baby!",
37
+ name: "ic3-capstone",
38
+ numDays: 180, //how many days worth1 of data
39
+ numEvents: 5_000_000, //how many events
40
+ numUsers: 25_000, //how many users
41
+ format: 'json', //csv or json
42
+ region: "US",
43
+ hasAnonIds: true, //if true, anonymousIds are created for each user
44
+ hasSessionIds: true, //if true, hasSessionIds are created for each user
45
+ hasAdSpend: false,
46
+ makeChart: false,
47
+ hasLocation: true,
48
+ hasAndroidDevices: false,
49
+ hasIOSDevices: false,
50
+ hasDesktopDevices: true,
51
+ hasBrowser: true,
52
+ hasCampaigns: false,
53
+ isAnonymous: false,
54
+ alsoInferFunnels: false,
55
+ concurrency: 1,
56
+ batchSize: 250_000,
57
+ writeToDisk: true,
58
+ events: [
59
+ {
60
+ event: "checkout",
61
+ weight: 2,
62
+ properties: {
63
+ currency: pickAWinner(["USD", "CAD", "EUR", "BTC", "ETH", "JPY"], 0),
64
+ coupon: weighChoices(["none", "none", "none", "none", "10%OFF", "20%OFF", "10%OFF", "20%OFF", "30%OFF", "40%OFF", "50%OFF"]),
65
+ cart: makeProducts()
66
+ }
67
+ },
68
+ {
69
+ event: "add to cart",
70
+ weight: 4,
71
+ properties: {
72
+ item: makeProducts(1),
73
+ }
74
+ },
75
+ {
76
+ event: "view item",
77
+ weight: 8,
78
+ properties: {
79
+ item: makeProducts(1)
80
+ }
81
+ },
82
+ {
83
+ event: "save item",
84
+ weight: 5,
85
+ properties: {
86
+ item: makeProducts(1),
87
+ }
88
+ },
89
+ {
90
+ event: "page view",
91
+ weight: 10,
92
+ properties: {
93
+ page: ["/", "/help", "/account", "/watch", "/listen", "/product", "/people", "/peace"],
94
+ }
95
+ },
96
+ {
97
+ event: "watch video",
98
+ weight: 8,
99
+ properties: {
100
+ watchTimeSec: weighNumRange(10, 600, .25),
101
+ }
102
+ },
103
+ {
104
+ event: "like video",
105
+ weight: 6,
106
+ properties: {
107
+
108
+ }
109
+ },
110
+ {
111
+ event: "dislike video",
112
+ weight: 4,
113
+ properties: {
114
+
115
+ }
116
+ },
117
+ {
118
+ event: "sign up",
119
+ weight: 1,
120
+ isFirstEvent: true,
121
+ properties: {
122
+ signupMethod: pickAWinner(["email", "google", "facebook", "twitter", "linkedin", "github"]),
123
+ referral: weighChoices(["none", "none", "none", "friend", "ad", "ad", "ad", "friend", "friend", "friend", "friend"]),
124
+ }
125
+ },
126
+
127
+ ],
128
+ funnels: [
129
+ {
130
+ sequence: ["page view", "view item", "save item", "page view", "sign up"],
131
+ conversionRate: 50,
132
+ order: "first-and-last-fixed",
133
+ weight: 1,
134
+ isFirstFunnel: true,
135
+ timeToConvert: 2,
136
+ experiment: false,
137
+ name: "Signup Flow"
138
+
139
+ },
140
+ {
141
+ sequence: ["watch video", "like video", "watch video", "like video"],
142
+ name: "Video Likes",
143
+ conversionRate: 60,
144
+ props: {
145
+ videoCategory: videoCategories,
146
+ quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
147
+ format: ["mp4", "avi", "mov", "mpg"],
148
+ uploader_id: chance.guid.bind(chance)
149
+ }
150
+ },
151
+ {
152
+ name: "Video Dislikes",
153
+ sequence: ["watch video", "dislike video", "watch video", "dislike video"],
154
+ conversionRate: 20,
155
+ props: {
156
+ videoCategory: videoCategories,
157
+ quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
158
+ format: ["mp4", "avi", "mov", "mpg"],
159
+ uploader_id: chance.guid.bind(chance)
160
+ }
161
+ },
162
+ {
163
+ name: "eCommerce Purchase",
164
+ sequence: ["view item", "view item", "add to cart", "view item", "add to cart", "checkout"],
165
+ conversionRate: 15,
166
+ requireRepeats: true,
167
+ weight: 10,
168
+ order: "last-fixed",
169
+ }
170
+
171
+ ],
172
+ superProps: {
173
+ theme: pickAWinner(["light", "dark", "custom", "light", "dark"]),
174
+ },
175
+ /*
176
+ user properties work the same as event properties
177
+ each key should be an array or function reference
178
+ */
179
+ userProps: {
180
+ title: chance.profession.bind(chance),
181
+ luckyNumber: weighNumRange(1, 500, .3),
182
+ spiritAnimal: spiritAnimals
183
+ },
184
+ scdProps: {},
185
+ mirrorProps: {},
186
+ /*
187
+ for group analytics keys, we need an array of arrays [[],[],[]]
188
+ each pair represents a group_key and the number of profiles for that key
189
+ */
190
+ groupKeys: [],
191
+ groupProps: {},
192
+ lookupTables: [{
193
+ key: "luckyNumber",
194
+ entries: 500,
195
+ attributes: {
196
+ present: ["happy", "sad", "angry", "confused", "excited", "bored", "curious", "anxious", "relaxed", "stressed"],
197
+ past: ["joyful", "melancholy", "furious", "puzzled", "thrilled", "weary", "inquiring", "nervous", "calm", "tense"],
198
+ future: ["hopeful", "dreadful", "optimistic", "skeptical", "eager", "lethargic", "inquiring", "apprehensive", "confident", "uneasy"],
199
+ weather: ["sunny", "rainy", "cloudy", "stormy", "snowy", "windy", "foggy", "humid", "dry", "chilly"],
200
+ temperature: weighNumRange(-10, 100)
201
+
202
+ }
203
+
204
+ }],
205
+ hook: function (record, type, meta) {
206
+
207
+ const NOW = dayjs();
208
+
209
+
210
+ if (type === "event") {
211
+
212
+ }
213
+
214
+ if (type === "everything") {
215
+ // every users has ~5 duplicate events
216
+ let numDupes = integer(3, 7);
217
+ if (record.length < numDupes) numDupes = record.length - 1;
218
+ // pick random events to duplicate
219
+ const dupesIndexes = [];
220
+ while (dupesIndexes.length < numDupes) {
221
+ const idx = integer(0, record.length - 1);
222
+ if (!dupesIndexes.includes(idx)) {
223
+ dupesIndexes.push(idx);
224
+ }
225
+ }
226
+
227
+ // duplicate those events
228
+ dupesIndexes.forEach(idx => {
229
+ const eventToDup = record[idx];
230
+ const newEvent = { ...eventToDup };
231
+ record.push(newEvent);
232
+ });
233
+
234
+
235
+ }
236
+
237
+
238
+
239
+ return record;
240
+ }
241
+ };
242
+
243
+ function makeProducts(maxItems = 5) {
244
+ return function () {
245
+ const categories = ["electronics", "books", "clothing", "home", "garden", "toys", "sports", "automotive", "beauty", "health", "grocery", "jewelry", "shoes", "tools", "office supplies"];
246
+ const descriptors = ["brand new", "open box", "refurbished", "used", "like new", "vintage", "antique", "collectible"];
247
+ const suffix = ["item", "product", "good", "merchandise", "thing", "object", "widget", "gadget", "device", "apparatus", "contraption", "instrument", "tool", "implement", "utensil", "appliance", "machine", "equipment", "gear", "kit", "set", "package"];
248
+ const assetPreview = ['.png', '.jpg', '.jpeg', '.heic', '.mp4', '.mov', '.avi'];
249
+ const data = [];
250
+ const numOfItems = integer(1, maxItems);
251
+
252
+ for (var i = 0; i < numOfItems; i++) {
253
+ const category = chance.pickone(categories);
254
+ const descriptor = chance.pickone(descriptors);
255
+ const suffixWord = chance.pickone(suffix);
256
+ const slug = `${descriptor.replace(/\s+/g, '-').toLowerCase()}-${suffixWord.replace(/\s+/g, '-').toLowerCase()}`;
257
+ const asset = chance.pickone(assetPreview);
258
+
259
+ // const product_id = chance.guid();
260
+ const price = integer(1, 100);
261
+ const quantity = integer(1, 5);
262
+
263
+ const item = {
264
+ // product_id: product_id,
265
+ // sku: integer(11111, 99999),
266
+ amount: price,
267
+ quantity: quantity,
268
+ total_value: price * quantity,
269
+ featured: chance.pickone([true, false, false]),
270
+ category: category,
271
+ descriptor: descriptor,
272
+ slug: slug,
273
+ assetPreview: `https://example.com/assets/${slug}${asset}`,
274
+ assetType: asset
275
+
276
+ };
277
+
278
+ data.push(item);
279
+ }
280
+
281
+ return () => [data];
282
+ };
283
+ };
284
+
285
+
286
+ function flip(likelihood = 50) {
287
+ return chance.bool({ likelihood });
288
+ }
289
+
290
+
291
+ export default config;