make-mp-data 1.5.56 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/settings.local.json +21 -0
  2. package/.gcloudignore +2 -1
  3. package/.vscode/launch.json +6 -17
  4. package/.vscode/settings.json +31 -2
  5. package/dungeons/media.js +371 -0
  6. package/index.js +353 -1766
  7. package/{components → lib/cli}/cli.js +25 -6
  8. package/lib/cloud-function.js +20 -0
  9. package/lib/core/config-validator.js +248 -0
  10. package/lib/core/context.js +180 -0
  11. package/lib/core/storage.js +268 -0
  12. package/{components → lib/data}/defaults.js +17 -14
  13. package/lib/generators/adspend.js +133 -0
  14. package/lib/generators/events.js +242 -0
  15. package/lib/generators/funnels.js +330 -0
  16. package/lib/generators/mirror.js +168 -0
  17. package/lib/generators/profiles.js +93 -0
  18. package/lib/generators/scd.js +102 -0
  19. package/lib/orchestrators/mixpanel-sender.js +222 -0
  20. package/lib/orchestrators/user-loop.js +194 -0
  21. package/lib/orchestrators/worker-manager.js +200 -0
  22. package/{components → lib/utils}/ai.js +8 -36
  23. package/{components → lib/utils}/chart.js +9 -9
  24. package/{components → lib/utils}/project.js +4 -4
  25. package/{components → lib/utils}/utils.js +35 -23
  26. package/package.json +15 -15
  27. package/scripts/dana.mjs +137 -0
  28. package/scripts/new-dungeon.sh +7 -6
  29. package/scripts/update-deps.sh +2 -1
  30. package/tests/cli.test.js +28 -25
  31. package/tests/e2e.test.js +38 -36
  32. package/tests/int.test.js +151 -56
  33. package/tests/testSoup.mjs +1 -1
  34. package/tests/unit.test.js +15 -14
  35. package/tsconfig.json +1 -1
  36. package/types.d.ts +68 -11
  37. package/vitest.config.js +47 -0
  38. package/log.json +0 -1678
  39. package/tests/jest.config.js +0 -47
  40. /package/{components → lib/utils}/prompt.txt +0 -0
@@ -0,0 +1,268 @@
1
+ /**
2
+ * Storage module providing HookArray functionality for data transformation and batching
3
+ * Extracted from index.js to eliminate global dependencies
4
+ */
5
+
6
+ /** @typedef {import('../../types').Context} Context */
7
+
8
+ import { existsSync } from "fs";
9
+ import pLimit from 'p-limit';
10
+ import os from "os";
11
+ import path from "path";
12
+ import * as u from "../utils/utils.js";
13
+
14
+ /**
15
+ * Creates a hooked array that transforms data on push and handles batching/disk writes
16
+ * @param {Array} arr - Base array to enhance
17
+ * @param {Object} opts - Configuration options
18
+ * @param {Function} opts.hook - Transform function applied to each item
19
+ * @param {string} opts.type - Type identifier for the hook function
20
+ * @param {string} opts.filepath - Base filename for disk writes
21
+ * @param {string} opts.format - Output format ('csv' or 'json')
22
+ * @param {number} opts.concurrency - Max concurrent file operations
23
+ * @param {Context} opts.context - Context object with config, batchSize, etc.
24
+ * @returns {Promise<Array>} Enhanced array with hookPush and flush methods
25
+ */
26
+ export async function createHookArray(arr = [], opts = {}) {
27
+ const {
28
+ hook = a => a,
29
+ type = "",
30
+ filepath = "./defaultFile",
31
+ format = "csv",
32
+ concurrency = 1,
33
+ context = {},
34
+ ...rest
35
+ } = opts;
36
+
37
+ const FILE_CONN = pLimit(concurrency);
38
+ const { config = {}, runtime = {} } = context;
39
+ const BATCH_SIZE = config.batchSize || 1_000_000;
40
+ const NODE_ENV = process.env.NODE_ENV || "unknown";
41
+
42
+ let batch = 0;
43
+ let writeDir;
44
+ let isBatchMode = runtime.isBatchMode || false;
45
+
46
+ // Determine write directory
47
+ const dataFolder = path.resolve("./data");
48
+ if (existsSync(dataFolder)) writeDir = dataFolder;
49
+ else writeDir = path.resolve("./");
50
+
51
+ if (NODE_ENV?.toLowerCase()?.startsWith("prod")) {
52
+ writeDir = path.resolve(os.tmpdir());
53
+ }
54
+
55
+ if (typeof config.writeToDisk === "string" && config.writeToDisk.startsWith('gs://')) {
56
+ writeDir = config.writeToDisk;
57
+ }
58
+
59
+ function getWritePath() {
60
+ if (isBatchMode) {
61
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
62
+ return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
63
+ }
64
+ else {
65
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
66
+ return path.join(writeDir, `${filepath}.${format}`);
67
+ }
68
+ }
69
+
70
+ function getWriteDir() {
71
+ return path.join(writeDir, `${filepath}.${format}`);
72
+ }
73
+
74
+ async function transformThenPush(item, meta) {
75
+ if (item === null || item === undefined) return false;
76
+ if (typeof item === 'object' && Object.keys(item).length === 0) return false;
77
+
78
+ const allMetaData = { ...rest, ...meta };
79
+
80
+ if (Array.isArray(item)) {
81
+ for (const i of item) {
82
+ try {
83
+ const enriched = await hook(i, type, allMetaData);
84
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
85
+ else arr.push(enriched);
86
+ } catch (e) {
87
+ console.error(`\n\nyour hook had an error\n\n`, e);
88
+ arr.push(i);
89
+ }
90
+ }
91
+ } else {
92
+ try {
93
+ const enriched = await hook(item, type, allMetaData);
94
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
95
+ else arr.push(enriched);
96
+ } catch (e) {
97
+ console.error(`\n\nyour hook had an error\n\n`, e);
98
+ arr.push(item);
99
+ }
100
+ }
101
+
102
+ if (arr.length > BATCH_SIZE) {
103
+ isBatchMode = true;
104
+ runtime.isBatchMode = true; // Update runtime state
105
+ batch++;
106
+ const writePath = getWritePath();
107
+ const writeResult = await FILE_CONN(() => writeToDisk(arr, { writePath }));
108
+ return writeResult;
109
+ } else {
110
+ return Promise.resolve(false);
111
+ }
112
+ }
113
+
114
+ async function writeToDisk(data, options) {
115
+ const { writePath } = options;
116
+ let writeResult;
117
+
118
+ if (config.verbose) {
119
+ console.log(`\n\n\twriting ${writePath}\n\n`);
120
+ }
121
+
122
+ switch (format) {
123
+ case "csv":
124
+ writeResult = await u.streamCSV(writePath, data);
125
+ break;
126
+ case "json":
127
+ writeResult = await u.streamJSON(writePath, data);
128
+ break;
129
+ default:
130
+ throw new Error(`format ${format} is not supported`);
131
+ }
132
+
133
+ if (isBatchMode) data.length = 0;
134
+ return writeResult;
135
+ }
136
+
137
+ async function flush() {
138
+ if (arr.length > 0) {
139
+ batch++;
140
+ const writePath = getWritePath();
141
+ await FILE_CONN(() => writeToDisk(arr, { writePath }));
142
+ if (isBatchMode) arr.length = 0; // free up memory for batch mode
143
+ }
144
+ }
145
+
146
+ // Enhance the array with our methods
147
+ const enrichedArray = arr;
148
+ enrichedArray.hookPush = transformThenPush;
149
+ enrichedArray.flush = flush;
150
+ enrichedArray.getWriteDir = getWriteDir;
151
+ enrichedArray.getWritePath = getWritePath;
152
+
153
+ // Add additional properties from rest
154
+ for (const key in rest) {
155
+ enrichedArray[key.toString()] = rest[key];
156
+ }
157
+
158
+ return enrichedArray;
159
+ }
160
+
161
+ /**
162
+ * Storage manager class for initializing and managing all storage containers
163
+ */
164
+ export class StorageManager {
165
+ constructor(context) {
166
+ this.context = context;
167
+ }
168
+
169
+ /**
170
+ * Initialize all storage containers for the data generation process
171
+ * @returns {import('../../types').Storage} Storage containers object
172
+ */
173
+ async initializeContainers() {
174
+ const { config } = this.context;
175
+
176
+ const storage = {
177
+ eventData: await createHookArray([], {
178
+ hook: config.hook,
179
+ type: "event",
180
+ filepath: `${config.simulationName || 'events'}-EVENTS`,
181
+ format: config.format || "csv",
182
+ concurrency: config.concurrency || 1,
183
+ context: this.context
184
+ }),
185
+
186
+ userProfilesData: await createHookArray([], {
187
+ hook: config.hook,
188
+ type: "user",
189
+ filepath: `${config.simulationName || 'users'}-USERS`,
190
+ format: config.format || "csv",
191
+ concurrency: config.concurrency || 1,
192
+ context: this.context
193
+ }),
194
+
195
+ adSpendData: await createHookArray([], {
196
+ hook: config.hook,
197
+ type: "adspend",
198
+ filepath: `${config.simulationName || 'adspend'}-ADSPEND`,
199
+ format: config.format || "csv",
200
+ concurrency: config.concurrency || 1,
201
+ context: this.context
202
+ }),
203
+
204
+ scdTableData: [],
205
+ groupProfilesData: [],
206
+ lookupTableData: [],
207
+
208
+ mirrorEventData: await createHookArray([], {
209
+ hook: config.hook,
210
+ type: "mirror",
211
+ filepath: `${config.simulationName || 'mirror'}-MIRROR`,
212
+ format: config.format || "csv",
213
+ concurrency: config.concurrency || 1,
214
+ context: this.context
215
+ })
216
+ };
217
+
218
+ // Initialize SCD tables if configured
219
+ if (config.scdProps && Object.keys(config.scdProps).length > 0) {
220
+ for (const scdKey of Object.keys(config.scdProps)) {
221
+ const scdArray = await createHookArray([], {
222
+ hook: config.hook,
223
+ type: "scd",
224
+ filepath: `${config.simulationName || 'scd'}-${scdKey}-SCD`,
225
+ format: config.format || "csv",
226
+ concurrency: config.concurrency || 1,
227
+ context: this.context
228
+ });
229
+ scdArray.scdKey = scdKey;
230
+ storage.scdTableData.push(scdArray);
231
+ }
232
+ }
233
+
234
+ // Initialize group profile tables if configured
235
+ if (config.groupKeys && config.groupKeys.length > 0) {
236
+ for (const [groupKey] of config.groupKeys) {
237
+ const groupArray = await createHookArray([], {
238
+ hook: config.hook,
239
+ type: "group",
240
+ filepath: `${config.simulationName || 'groups'}-${groupKey}-GROUPS`,
241
+ format: config.format || "csv",
242
+ concurrency: config.concurrency || 1,
243
+ context: this.context
244
+ });
245
+ groupArray.groupKey = groupKey;
246
+ storage.groupProfilesData.push(groupArray);
247
+ }
248
+ }
249
+
250
+ // Initialize lookup tables if configured
251
+ if (config.lookupTables && config.lookupTables.length > 0) {
252
+ for (const lookupConfig of config.lookupTables) {
253
+ const lookupArray = await createHookArray([], {
254
+ hook: config.hook,
255
+ type: "lookup",
256
+ filepath: `${config.simulationName || 'lookup'}-${lookupConfig.key}-LOOKUP`,
257
+ format: config.format || "csv",
258
+ concurrency: config.concurrency || 1,
259
+ context: this.context
260
+ });
261
+ lookupArray.lookupKey = lookupConfig.key;
262
+ storage.lookupTableData.push(lookupArray);
263
+ }
264
+ }
265
+
266
+ return storage;
267
+ }
268
+ }
@@ -1,8 +1,17 @@
1
1
  /* cSpell:disable */
2
+ /**
3
+ * Default data sets for generating realistic test data
4
+ * @fileoverview Contains default values for campaigns, devices, locations, and domains
5
+ */
6
+
7
+ /** @typedef {import('../../types.d.ts').main.Dungeon} Config */
8
+ /** @typedef {import('../../types.d.ts').main.ValueValid} ValueValid */
9
+
2
10
  //? https://docs.mixpanel.com/docs/data-structure/property-reference#default-properties
3
11
 
4
12
  const domainSuffix = ["com", "com", "com", "com", "net", "org", "net", "org", "io", "co", "co.uk", "us", "biz", "info", "gov", "edu"];
5
13
  const domainPrefix = ["gmail", "gmail", "gmail", "gmail", "gmail", "gmail", "yahoo", "yahoo", "icloud", "icloud", "icloud", "icloud", "hotmail", "hotmail", "gmail", "gmail", "gmail", "gmail", "gmail", "gmail", "yahoo", "yahoo", "icloud", "icloud", "icloud", "hotmail", "hotmail", "outlook", "aol", "outlook", "aol", "protonmail", "zoho", "gmx", "yandex", "mail", "inbox", "fastmail", "tutanota", "mailfence", "disroot", "riseup", "posteo", "runbox", "kolabnow", "mailbox", "scryptmail", "ctemplar", "countermail", "hushmail", "startmail", "privatemail"];
14
+
6
15
  const campaigns = [
7
16
  {
8
17
  utm_campaign: ["$organic"],
@@ -25,7 +34,6 @@ const campaigns = [
25
34
  utm_content: ["sc_control_group", "sc_variant_A", "sc_variant_B", "sc_variant_C", "sc_variant_D"],
26
35
  utm_term: ["sc_jan_feb", "sc_mar_apr", "sc_may_jun", "sc_jul_aug", "sc_sep_oct", "sc_nov_dec"]
27
36
  },
28
-
29
37
  {
30
38
  utm_source: ["linkedin"],
31
39
  utm_campaign: ["li_free_trial", "li_discount_US", "li_fall_sale", "li_holiday_special", "li_lookalike_audience"],
@@ -315,7 +323,6 @@ const androidDevices = [
315
323
  }
316
324
  ];
317
325
 
318
-
319
326
  const desktopDevices = [
320
327
  {
321
328
  model: 'iMac 24-inch (M1, 2021)',
@@ -446,7 +453,6 @@ const desktopDevices = [
446
453
  }
447
454
  ];
448
455
 
449
-
450
456
  const locations = [
451
457
  {
452
458
  "country": "United States",
@@ -1242,19 +1248,16 @@ const locations = [
1242
1248
  }
1243
1249
  ];
1244
1250
 
1245
-
1246
-
1247
-
1248
-
1249
- module.exports = {
1251
+ export {
1250
1252
  campaigns,
1251
- devices: {
1252
- browsers,
1253
- androidDevices,
1254
- iosDevices,
1255
- desktopDevices
1256
- },
1257
1253
  locations,
1258
1254
  domainSuffix,
1259
1255
  domainPrefix
1260
1256
  };
1257
+
1258
+ export const devices = {
1259
+ browsers,
1260
+ androidDevices,
1261
+ iosDevices,
1262
+ desktopDevices
1263
+ };
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Ad Spend generator module
3
+ * Creates realistic advertising spend events with UTM parameters and metrics
4
+ */
5
+
6
+ /** @typedef {import('../../types').Context} Context */
7
+
8
+ import dayjs from "dayjs";
9
+ import { md5 } from "ak-tools";
10
+ import * as u from "../utils/utils.js";
11
+
12
+ /**
13
+ * Creates ad spend events for a given day and campaign configurations
14
+ * @param {Context} context - Context object containing config, defaults, etc.
15
+ * @param {string} day - ISO date string for the ad spend day
16
+ * @param {Array} campaigns - Array of campaign configurations (optional, uses context.campaigns if not provided)
17
+ * @returns {Promise<Array>} Array of ad spend event objects
18
+ */
19
+ export async function makeAdSpend(context, day, campaigns = null) {
20
+ // Update operation counter
21
+ context.incrementOperations();
22
+
23
+ // Use campaigns from context if not provided
24
+ const campaignConfigs = campaigns || context.campaigns;
25
+
26
+ if (!campaignConfigs || campaignConfigs.length === 0) {
27
+ return [];
28
+ }
29
+
30
+ const chance = u.getChance();
31
+ const adSpendEvents = [];
32
+
33
+ for (const network of campaignConfigs) {
34
+ const networkCampaigns = network.utm_campaign;
35
+
36
+ for (const campaign of networkCampaigns) {
37
+ // Skip organic campaigns
38
+ if (campaign === "$organic") continue;
39
+
40
+ // Generate realistic ad spend metrics
41
+ const adSpendEvent = createAdSpendEvent(network, campaign, day, chance);
42
+ adSpendEvents.push(adSpendEvent);
43
+ }
44
+ }
45
+
46
+ return adSpendEvents;
47
+ }
48
+
49
+ /**
50
+ * Creates a single ad spend event with realistic metrics
51
+ * @param {Object} network - Network configuration object
52
+ * @param {string} campaign - Campaign name
53
+ * @param {string} day - ISO date string
54
+ * @param {Object} chance - Chance.js instance
55
+ * @returns {Object} Ad spend event object
56
+ */
57
+ function createAdSpendEvent(network, campaign, day, chance) {
58
+ // Generate realistic cost
59
+ const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
60
+
61
+ // Generate realistic CPC and CTR
62
+ const avgCPC = chance.floating({ min: 0.33, max: 2.00, fixed: 4 });
63
+ const avgCTR = chance.floating({ min: 0.05, max: 0.25, fixed: 4 });
64
+
65
+ // Calculate derived metrics
66
+ const clicks = Math.floor(cost / avgCPC);
67
+ const impressions = Math.floor(clicks / avgCTR);
68
+ const views = Math.floor(impressions * avgCTR);
69
+
70
+ // Generate UTM parameters
71
+ const utm_medium = u.choose(u.pickAWinner(network.utm_medium)());
72
+ const utm_content = u.choose(u.pickAWinner(network.utm_content)());
73
+ const utm_term = u.choose(u.pickAWinner(network.utm_term)());
74
+
75
+ // Create unique identifiers
76
+ const id = network.utm_source[0] + '-' + campaign;
77
+ const uid = md5(id);
78
+
79
+ return {
80
+ event: "$ad_spend",
81
+ time: day,
82
+ source: 'dm4',
83
+ utm_campaign: campaign,
84
+ campaign_id: id,
85
+ insert_id: uid,
86
+ network: network.utm_source[0].toUpperCase(),
87
+ distinct_id: network.utm_source[0].toUpperCase(),
88
+ utm_source: network.utm_source[0],
89
+ utm_medium,
90
+ utm_content,
91
+ utm_term,
92
+ clicks,
93
+ views,
94
+ impressions,
95
+ cost,
96
+ date: dayjs(day).format("YYYY-MM-DD"),
97
+ };
98
+ }
99
+
100
+ /**
101
+ * Validates campaign configuration
102
+ * @param {Array} campaigns - Campaign configurations to validate
103
+ * @returns {boolean} True if valid, throws error if invalid
104
+ */
105
+ export function validateCampaigns(campaigns) {
106
+ if (!Array.isArray(campaigns)) {
107
+ throw new Error("Campaigns must be an array");
108
+ }
109
+
110
+ for (const network of campaigns) {
111
+ if (!network.utm_source || !Array.isArray(network.utm_source)) {
112
+ throw new Error("Each campaign network must have utm_source array");
113
+ }
114
+
115
+ if (!network.utm_campaign || !Array.isArray(network.utm_campaign)) {
116
+ throw new Error("Each campaign network must have utm_campaign array");
117
+ }
118
+
119
+ if (!network.utm_medium || !Array.isArray(network.utm_medium)) {
120
+ throw new Error("Each campaign network must have utm_medium array");
121
+ }
122
+
123
+ if (!network.utm_content || !Array.isArray(network.utm_content)) {
124
+ throw new Error("Each campaign network must have utm_content array");
125
+ }
126
+
127
+ if (!network.utm_term || !Array.isArray(network.utm_term)) {
128
+ throw new Error("Each campaign network must have utm_term array");
129
+ }
130
+ }
131
+
132
+ return true;
133
+ }