make-mp-data 1.5.55 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.claude/settings.local.json +20 -0
  2. package/.gcloudignore +2 -1
  3. package/.vscode/launch.json +6 -3
  4. package/.vscode/settings.json +31 -2
  5. package/dungeons/media.js +371 -0
  6. package/index.js +354 -1757
  7. package/{components → lib/cli}/cli.js +21 -6
  8. package/lib/cloud-function.js +20 -0
  9. package/lib/core/config-validator.js +248 -0
  10. package/lib/core/context.js +180 -0
  11. package/lib/core/storage.js +268 -0
  12. package/{components → lib/data}/defaults.js +17 -14
  13. package/lib/generators/adspend.js +133 -0
  14. package/lib/generators/events.js +242 -0
  15. package/lib/generators/funnels.js +330 -0
  16. package/lib/generators/mirror.js +168 -0
  17. package/lib/generators/profiles.js +93 -0
  18. package/lib/generators/scd.js +102 -0
  19. package/lib/orchestrators/mixpanel-sender.js +222 -0
  20. package/lib/orchestrators/user-loop.js +194 -0
  21. package/lib/orchestrators/worker-manager.js +200 -0
  22. package/{components → lib/utils}/ai.js +8 -36
  23. package/{components → lib/utils}/chart.js +9 -9
  24. package/{components → lib/utils}/project.js +4 -4
  25. package/{components → lib/utils}/utils.js +35 -23
  26. package/package.json +19 -12
  27. package/scripts/dana.mjs +137 -0
  28. package/scripts/new-dungeon.sh +7 -6
  29. package/scripts/update-deps.sh +2 -1
  30. package/tests/cli.test.js +28 -25
  31. package/tests/e2e.test.js +38 -36
  32. package/tests/int.test.js +151 -56
  33. package/tests/testSoup.mjs +1 -1
  34. package/tests/unit.test.js +15 -14
  35. package/tsconfig.json +1 -1
  36. package/types.d.ts +76 -18
  37. package/vitest.config.js +47 -0
  38. package/dungeons/adspend.js +0 -96
  39. package/dungeons/anon.js +0 -104
  40. package/dungeons/big.js +0 -224
  41. package/dungeons/business.js +0 -327
  42. package/dungeons/complex.js +0 -396
  43. package/dungeons/foobar.js +0 -241
  44. package/dungeons/funnels.js +0 -220
  45. package/dungeons/gaming-experiments.js +0 -323
  46. package/dungeons/gaming.js +0 -314
  47. package/dungeons/governance.js +0 -288
  48. package/dungeons/mirror.js +0 -129
  49. package/dungeons/sanity.js +0 -118
  50. package/dungeons/scd.js +0 -205
  51. package/dungeons/session-replay.js +0 -175
  52. package/dungeons/simple.js +0 -150
  53. package/dungeons/userAgent.js +0 -190
  54. package/log.json +0 -1067
  55. package/tests/jest.config.js +0 -47
  56. /package/{components → lib/utils}/prompt.txt +0 -0
package/index.js CHANGED
@@ -1,1900 +1,497 @@
1
1
  #! /usr/bin/env node
2
2
 
3
- /*
4
- make fake mixpanel data easily!
5
- by AK
6
- ak@mixpanel.com
7
- */
8
-
9
- //todo: churn implementation
10
- //todo: regular interval events (like 'card charged')
11
- //todo: SCDs send to mixpanel
12
- //todo: decent 'new dungeon' workflow
13
- //todo: validation that funnel events exist
14
- //todo: ability to catch events not in funnels and make them random...
15
-
16
-
17
- //TIME
18
- const dayjs = require("dayjs");
19
- const utc = require("dayjs/plugin/utc");
3
+ /**
4
+ * make-mp-data: Generate realistic Mixpanel data for testing and demos
5
+ * Modular, scalable data generation with support for events, users, funnels, SCDs, and more
6
+ *
7
+ * @author AK <ak@mixpanel.com>
8
+ * @version 3.0.0
9
+ */
10
+
11
+ /** @typedef {import('./types').Dungeon} Config */
12
+ /** @typedef {import('./types').Storage} Storage */
13
+ /** @typedef {import('./types').Result} Result */
14
+ /** @typedef {import('./types').Context} Context */
15
+
16
+ // Core modules
17
+ import { createContext, updateContextWithStorage } from './lib/core/context.js';
18
+ import { validateDungeonConfig } from './lib/core/config-validator.js';
19
+ import { StorageManager } from './lib/core/storage.js';
20
+
21
+ // Orchestrators
22
+ import { userLoop } from './lib/orchestrators/user-loop.js';
23
+ import { sendToMixpanel } from './lib/orchestrators/mixpanel-sender.js';
24
+ import { handleCloudFunctionEntry } from './lib/orchestrators/worker-manager.js';
25
+
26
+ // Generators
27
+ import { makeAdSpend } from './lib/generators/adspend.js';
28
+ import { makeMirror } from './lib/generators/mirror.js';
29
+ import { makeGroupProfile, makeProfile } from './lib/generators/profiles.js';
30
+
31
+ // Utilities
32
+ import getCliParams from './lib/cli/cli.js';
33
+ import * as u from './lib/utils/utils.js';
34
+ import { generateLineChart } from './lib/utils/chart.js';
35
+
36
+ // External dependencies
37
+ import dayjs from "dayjs";
38
+ import utc from "dayjs/plugin/utc.js";
39
+ import functions from '@google-cloud/functions-framework';
40
+ import { timer, sLog } from 'ak-tools';
41
+ import fs, { existsSync } from 'fs';
42
+
43
+ // Initialize dayjs and time constants
20
44
  dayjs.extend(utc);
21
45
  const FIXED_NOW = dayjs('2024-02-02').unix();
22
46
  global.FIXED_NOW = FIXED_NOW;
23
- // ^ this creates a FIXED POINT in time; we will shift it later
24
47
  let FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(90, 'd').unix();
25
48
  global.FIXED_BEGIN = FIXED_BEGIN;
26
- const actualNow = dayjs().add(2, "day");
27
- const timeShift = actualNow.diff(dayjs.unix(FIXED_NOW), "seconds");
28
- const daysShift = actualNow.diff(dayjs.unix(FIXED_NOW), "days");
29
-
30
- // UTILS
31
- const { existsSync, writeFileSync } = require("fs");
32
- const pLimit = require('p-limit');
33
- const os = require("os");
34
- const path = require("path");
35
- const { comma, bytesHuman, makeName, md5, clone, tracker, uid, timer, ls, rm, touch, load, sLog } = require("ak-tools");
36
- const jobTimer = timer('job');
37
- const { generateLineChart } = require('./components/chart.js');
38
- const { version } = require('./package.json');
39
- const mp = require("mixpanel-import");
40
- const u = require("./components/utils.js");
41
- const getCliParams = require("./components/cli.js");
42
- const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
43
- const t = require('ak-tools');
44
-
45
-
46
- //CLOUD
47
- const functions = require('@google-cloud/functions-framework');
48
- const { GoogleAuth } = require('google-auth-library');
49
- const CONCURRENCY = 1_000;
50
- let RUNTIME_URL = "https://dm4-lmozz6xkha-uc.a.run.app"; // IMPORTANT: this is what allows the service to call itself
51
- // const functionName = process.env.FUNCTION_NAME || process.env.K_SERVICE;
52
-
53
- // const region = process.env.REGION; // Optionally, you can get the region too
54
- // const GCP_PROJECT = process.env.GCLOUD_PROJECT; // Project ID is also available as an environment variable
55
- // const isCloudFunction = !!process.env.FUNCTION_NAME || !!process.env.FUNCTION_TARGET;
56
- // if (isCloudFunction) {
57
- // RUNTIME_URL = `https://${region}-${GCP_PROJECT}.cloudfunctions.net/${functionName}`;
58
- // }
59
- // else {
60
- // RUNTIME_URL = `http://localhost:8080`;
61
- // }
62
-
63
-
64
-
65
- // DEFAULTS
66
- const { campaigns, devices, locations } = require('./components/defaults.js');
67
- let CAMPAIGNS;
68
- let DEFAULTS;
69
- /** @type {Storage} */
70
- let STORAGE;
71
- /** @type {Config} */
72
- let CONFIG;
73
- require('dotenv').config();
74
-
75
- const { NODE_ENV = "unknown" } = process.env;
76
-
77
-
78
-
79
-
80
- // RUN STATE
81
- let VERBOSE = false;
82
- let isCLI = false;
83
- // if we are running in batch mode, we MUST write to disk before we can send to mixpanel
84
- let isBATCH_MODE = false;
85
- let BATCH_SIZE = 1_000_000;
86
49
 
87
- //todo: these should be moved into the hookedArrays
88
- let operations = 0;
89
- let eventCount = 0;
90
- let userCount = 0;
50
+ // Package version
51
+ const { version } = JSON.parse(fs.readFileSync('./package.json', 'utf8'));
91
52
 
53
+ // Environment
54
+ const { NODE_ENV = "unknown" } = process.env;
55
+ const isCLI = process.argv[1].endsWith('index.js') || process.argv[1].endsWith('cli.js');
92
56
 
93
57
  /**
94
- * generates fake mixpanel data
95
- * @param {Config} config
58
+ * Main data generation function
59
+ * @param {Config} config - Configuration object for data generation
60
+ * @returns {Promise<Result>} Generated data and metadata
96
61
  */
97
62
  async function main(config) {
63
+ const jobTimer = timer('job');
98
64
  jobTimer.start();
99
- const seedWord = process.env.SEED || config.seed || "hello friend!";
100
- config.seed = seedWord;
101
- const chance = u.initChance(seedWord);
102
- //seed the random number generator, get it with getChance()
103
- // ^ this is critical; same seed = same data;
104
- // ^ seed can be passed in as an env var or in the config
105
- validateDungeonConfig(config);
106
- global.FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(config.numDays, 'd').unix();
107
-
108
- //GLOBALS
109
- CONFIG = config;
110
- VERBOSE = config.verbose;
111
- CAMPAIGNS = campaigns;
112
- DEFAULTS = {
113
- locationsUsers: u.pickAWinner(clone(locations).map(l => { delete l.country; return l; }), 0),
114
- locationsEvents: u.pickAWinner(clone(locations).map(l => { delete l.country_code; return l; }), 0),
115
- iOSDevices: u.pickAWinner(devices.iosDevices, 0),
116
- androidDevices: u.pickAWinner(devices.androidDevices, 0),
117
- desktopDevices: u.pickAWinner(devices.desktopDevices, 0),
118
- browsers: u.pickAWinner(devices.browsers, 0),
119
- campaigns: u.pickAWinner(campaigns, 0),
120
- };
121
-
122
- if (config.singleCountry) {
123
- DEFAULTS.locationsEvents = u.pickAWinner(clone(locations)
124
- .filter(l => l.country === config.singleCountry)
125
- .map(l => { delete l.country; return l; }), 0);
126
-
127
- DEFAULTS.locationsUsers = u.pickAWinner(clone(locations)
128
- .filter(l => l.country === config.singleCountry)
129
- .map(l => { delete l.country_code; return l; }), 0);
130
- }
131
-
132
-
133
- //TRACKING
134
- const runId = uid(42);
135
- const { events, superProps, userProps, scdProps, groupKeys, groupProps, lookupTables, soup, hook, mirrorProps, token: source_proj_token, ...trackingParams } = config;
136
- let { funnels } = config;
137
- trackingParams.runId = runId;
138
- trackingParams.version = version;
139
- delete trackingParams.funnels;
140
-
141
- //STORAGE
142
- const { simulationName, format } = config;
143
- const eventData = await makeHookArray([], { hook, type: "event", config, format, filepath: `${simulationName}-EVENTS` });
144
- const userProfilesData = await makeHookArray([], { hook, type: "user", config, format, filepath: `${simulationName}-USERS` });
145
- const adSpendData = await makeHookArray([], { hook, type: "ad-spend", config, format, filepath: `${simulationName}-AD-SPEND` });
146
- const groupEventData = await makeHookArray([], { hook, type: "group-event", config, format, filepath: `${simulationName}-GROUP-EVENTS` });
147
-
148
- // SCDs, Groups, + Lookups may have multiple tables
149
- const scdTableKeys = Object.keys(scdProps);
150
- const scdTableData = await Promise.all(scdTableKeys.map(async (key) =>
151
- //todo don't assume everything is a string... lol
152
- // @ts-ignore
153
- await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, entityType: config.scdProps[key]?.type || "user", dataType: "string", filepath: `${simulationName}-${scdProps[key]?.type || "user"}-SCD-${key}` })
154
- ));
155
- const groupTableKeys = Object.keys(groupKeys);
156
- const groupProfilesData = await Promise.all(groupTableKeys.map(async (key, index) => {
157
- const groupKey = groupKeys[index]?.slice()?.shift();
158
- return await makeHookArray([], { hook, type: "group", config, format, groupKey, filepath: `${simulationName}-GROUPS-${groupKey}` });
159
- }));
160
-
161
- const lookupTableKeys = Object.keys(lookupTables);
162
- const lookupTableData = await Promise.all(lookupTableKeys.map(async (key, index) => {
163
- const lookupKey = lookupTables[index].key;
164
- return await makeHookArray([], { hook, type: "lookup", config, format, lookupKey: lookupKey, filepath: `${simulationName}-LOOKUP-${lookupKey}` });
165
- }));
166
-
167
- const mirrorEventData = await makeHookArray([], { hook, type: "mirror", config, format, filepath: `${simulationName}-MIRROR` });
168
-
169
- STORAGE = {
170
- eventData,
171
- userProfilesData,
172
- scdTableData,
173
- groupProfilesData,
174
- lookupTableData,
175
- mirrorEventData,
176
- adSpendData,
177
- groupEventData
178
-
179
- };
180
65
 
181
-
182
- track('start simulation', trackingParams);
183
- log(`------------------SETUP------------------`);
184
- log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
185
- log(`and your configuration is:\n\n`, JSON.stringify(trackingParams, null, 2));
186
- log(`------------------SETUP------------------`, "\n");
187
-
188
-
189
-
190
- //USERS
191
- log(`---------------SIMULATION----------------`, "\n\n");
192
- const { concurrency = 1 } = config;
193
- await userLoop(config, STORAGE, concurrency);
194
- const { hasAdSpend, epochStart, epochEnd } = config;
195
-
196
- // AD SPEND
197
- if (hasAdSpend) {
198
- const days = u.datesBetween(epochStart, epochEnd);
199
- for (const day of days) {
200
- const dailySpendData = await makeAdSpend(day);
201
- for (const spendEvent of dailySpendData) {
202
- await adSpendData.hookPush(spendEvent);
66
+ //cli mode check for positional dungeon config
67
+ if (isCLI) {
68
+ const firstArg = config._.slice().pop()
69
+ if (firstArg?.endsWith('.js') && existsSync(firstArg)) {
70
+ if (config.verbose) {
71
+ console.log(`\n🔍 Loading dungeon config from: ${firstArg}`);
72
+ }
73
+ try {
74
+ const dungeonConfig = await import(firstArg);
75
+ config = dungeonConfig.default || dungeonConfig;
76
+ } catch (error) {
77
+ console.error(`\n❌ Error loading dungeon config from ${firstArg}: ${error.message}`);
78
+ throw error;
203
79
  }
204
80
  }
205
-
81
+
206
82
  }
207
83
 
84
+ let validatedConfig;
85
+ try {
86
+ // Step 1: Validate and enrich configuration
87
+ validatedConfig = validateDungeonConfig(config);
208
88
 
209
- log("\n");
210
-
211
- //GROUP PROFILES
212
- const groupSCDs = t.objFilter(scdProps, (scd) => scd.type !== 'user');
213
- for (const [index, groupPair] of groupKeys.entries()) {
214
- const groupKey = groupPair[0];
215
- const groupCardinality = groupPair[1];
216
- for (let i = 1; i < groupCardinality + 1; i++) {
217
- if (VERBOSE) u.progress([["groups", i]]);
218
-
219
- const props = await makeProfile(groupProps[groupKey], { created: () => { return dayjs().subtract(u.integer(0, CONFIG.numDays || 30), 'd').toISOString(); } });
220
- const group = {
221
- [groupKey]: i,
222
- ...props,
223
- };
224
- group["distinct_id"] = i.toString();
225
- await groupProfilesData[index].hookPush(group);
226
-
227
- //SCDs
228
- const thisGroupSCD = t.objFilter(groupSCDs, (scd) => scd.type === groupKey);
229
- const groupSCDKeys = Object.keys(thisGroupSCD);
230
- const groupSCD = {};
231
- for (const [index, key] of groupSCDKeys.entries()) {
232
- const { max = 100 } = groupSCDs[key];
233
- const mutations = chance.integer({ min: 2, max });
234
- const changes = await makeSCD(scdProps[key], key, i.toString(), mutations, group.created);
235
- groupSCD[key] = changes;
236
- const scdTable = scdTableData
237
- .filter(hookArr => hookArr.scdKey === key);
238
-
239
- await config.hook(changes, 'scd-pre', { profile: group, type: groupKey, scd: { [key]: groupSCDs[key] }, config, allSCDs: groupSCD });
240
- await scdTable[0].hookPush(changes, { profile: group, type: groupKey });
241
- }
89
+ // Step 2: Create context with validated config
90
+ const context = createContext(validatedConfig);
242
91
 
92
+ // Step 3: Initialize storage containers
93
+ const storageManager = new StorageManager(context);
94
+ const storage = await storageManager.initializeContainers();
95
+ updateContextWithStorage(context, storage);
243
96
 
97
+ // Step 4: Generate ad spend data (if enabled)
98
+ if (validatedConfig.hasAdSpend) {
99
+ await generateAdSpendData(context);
244
100
  }
245
- }
246
- log("\n");
247
-
248
- //GROUP EVENTS
249
- if (config.groupEvents) {
250
- for (const groupEvent of config.groupEvents) {
251
- const { frequency, group_key, attribute_to_user, group_size, ...normalEvent } = groupEvent;
252
- for (const group_num of Array.from({ length: group_size }, (_, i) => i + 1)) {
253
- const groupProfile = groupProfilesData.find(groups => groups.groupKey === group_key).find(group => group[group_key] === group_num);
254
- const { created, distinct_id } = groupProfile;
255
- normalEvent[group_key] = distinct_id;
256
- const random_user_id = chance.pick(eventData.filter(a => a.user_id)).user_id;
257
- if (!random_user_id) debugger;
258
- const deltaDays = actualNow.diff(dayjs(created), "day");
259
- const numIntervals = Math.floor(deltaDays / frequency);
260
- const eventsForThisGroup = [];
261
- for (let i = 0; i < numIntervals; i++) {
262
- const event = await makeEvent(random_user_id, null, normalEvent, [], [], {}, [], false, true);
263
- if (!attribute_to_user) delete event.user_id;
264
- event[group_key] = distinct_id;
265
- event.time = dayjs(created).add(i * frequency, "day").toISOString();
266
- delete event.distinct_id;
267
- //always skip the first event
268
- if (i !== 0) {
269
- eventsForThisGroup.push(event);
270
- }
271
- }
272
- await groupEventData.hookPush(eventsForThisGroup, { profile: groupProfile });
273
- }
274
- }
275
- }
276
101
 
102
+ // Step 5: Main user and event generation
103
+ await userLoop(context);
277
104
 
278
- //LOOKUP TABLES
279
- for (const [index, lookupTable] of lookupTables.entries()) {
280
- const { key, entries, attributes } = lookupTable;
281
- for (let i = 1; i < entries + 1; i++) {
282
- if (VERBOSE) u.progress([["lookups", i]]);
283
- const props = await makeProfile(attributes);
284
- const item = {
285
- [key]: i,
286
- ...props,
287
- };
288
- await lookupTableData[index].hookPush(item);
105
+ // Step 6: Generate group profiles (if configured)
106
+ if (validatedConfig.groupKeys && validatedConfig.groupKeys.length > 0) {
107
+ await generateGroupProfiles(context);
289
108
  }
290
109
 
291
- }
292
- log("\n");
293
-
294
-
295
- // MIRROR
296
- if (Object.keys(mirrorProps).length) await makeMirror(config, STORAGE);
297
-
298
-
299
- log("\n");
300
- log(`---------------SIMULATION----------------`, "\n");
301
-
302
- // draw charts
303
- const { makeChart = false } = config;
304
- if (makeChart) {
305
- const bornEvents = config.events?.filter((e) => e?.isFirstEvent)?.map(e => e.event) || [];
306
- const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
307
- const bornBehaviors = [...bornEvents, ...bornFunnels];
308
- const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
309
- }
310
- const { writeToDisk = true, token } = config;
311
- if (!writeToDisk && !token) {
312
- jobTimer.stop(false);
313
- const { start, end, delta, human } = jobTimer.report(false);
314
- // this is awkward, but i couldn't figure out any other way to assert a type in jsdoc
315
- const i = /** @type {any} */ (STORAGE);
316
- i.time = { start, end, delta, human };
317
- const j = /** @type {Result} */ (i);
318
- return j;
319
-
320
- }
321
-
322
- log(`-----------------WRITES------------------`, `\n\n`);
323
-
324
- // write to disk and/or send to mixpanel
325
- let files;
326
- if (writeToDisk) {
327
- for (const key in STORAGE) {
328
- const table = STORAGE[key];
329
- if (table.length && typeof table.flush === "function") {
330
- await table.flush();
331
- } else {
332
- if (Array.isArray(table) && typeof table[0]?.flush === "function") {
333
- for (const subTable of table) {
334
- await subTable.flush();
335
- }
336
- }
337
- }
110
+ // Step 7: Generate group SCDs (if configured)
111
+ if (validatedConfig.scdProps && validatedConfig.groupKeys && validatedConfig.groupKeys.length > 0) {
112
+ await generateGroupSCDs(context);
338
113
  }
339
- }
340
- let importResults;
341
- if (token) importResults = await sendToMixpanel(config, STORAGE);
342
-
343
-
344
- log(`\n-----------------WRITES------------------`, "\n");
345
- track('end simulation', trackingParams);
346
- jobTimer.stop(false);
347
- const { start, end, delta, human } = jobTimer.report(false);
348
-
349
- if (process.env.NODE_ENV === 'dev') debugger;
350
- return {
351
- ...STORAGE,
352
- importResults,
353
- files,
354
- operations,
355
- eventCount,
356
- userCount,
357
- time: { start, end, delta, human },
358
- };
359
- }
360
114
 
361
- functions.http('entry', async (req, res) => {
362
- const reqTimer = timer('request');
363
- reqTimer.start();
364
- let response = {};
365
- let script = req.body || "";
366
- const params = { replicate: 1, is_replica: "false", runId: "", seed: "", ...req.query };
367
- const replicate = Number(params.replicate);
368
- // @ts-ignore
369
- if (params?.is_replica === "true") params.is_replica = true;
370
- // @ts-ignore
371
- else params.is_replica = false;
372
- const isReplica = params.is_replica;
373
- isBATCH_MODE = true;
374
- if (!params.runId) params.runId = uid(42);
375
- try {
376
- if (!script) throw new Error("no script");
377
-
378
- // Replace require("../ with require("./
379
- // script = script.replace(/require\("\.\.\//g, 'require("./');
380
- // ^ need to replace this because of the way the script is passed in... this is sketch
381
-
382
- /** @type {Config} */
383
- const config = eval(script);
384
- if (isReplica) {
385
- const newSeed = (Math.random() / Math.random() / Math.random() / Math.random() / Math.random() / Math.random()).toString();
386
- config.seed = newSeed;
387
- params.seed = newSeed;
115
+ // Step 8: Generate lookup tables (if configured)
116
+ if (validatedConfig.lookupTables && validatedConfig.lookupTables.length > 0) {
117
+ await generateLookupTables(context);
388
118
  }
389
119
 
390
- /** @type {Config} */
391
- const optionsYouCantChange = {
392
- verbose: false
393
- };
394
-
395
- if (replicate <= 1 || isReplica) {
396
- if (isReplica) sLog("DM4: worker start", params);
397
- // @ts-ignore
398
- const { files = [], operations = 0, eventCount = 0, userCount = 0 } = await main({
399
- ...config,
400
- ...optionsYouCantChange,
401
- });
402
- reqTimer.stop(false);
403
- response = { files, operations, eventCount, userCount };
120
+ // Step 9: Generate mirror datasets (if configured)
121
+ if (validatedConfig.mirrorProps && Object.keys(validatedConfig.mirrorProps).length > 0) {
122
+ await makeMirror(context);
404
123
  }
405
124
 
406
- else {
407
- sLog(`DM4: job start (${replicate} workers)`, params);
408
- const results = await spawn_file_workers(replicate, script, params);
409
- response = results;
125
+ // Step 10: Generate charts (if enabled)
126
+ if (validatedConfig.makeChart) {
127
+ await generateCharts(context);
410
128
  }
411
- }
412
- catch (e) {
413
- sLog("DM4: error", { error: e.message, stack: e.stack }, "ERROR");
414
- response = { error: e.message };
415
- res.status(500);
416
- }
417
129
 
418
- finally {
419
- reqTimer.stop(false);
420
- const { start, end, delta, human } = reqTimer.report(false);
421
- if (!isReplica) {
422
- sLog(`DM4: job end (${human})`, { human, delta, ...params, ...response });
423
- }
424
- if (isReplica) {
425
- const eps = Math.floor(((response?.eventCount || 0) / delta) * 1000);
426
- sLog(`DM4: worker end (${human})`, { human, delta, eps, ...params, ...response });
130
+ // Step 11: Flush storage containers to disk (if writeToDisk enabled)
131
+ if (validatedConfig.writeToDisk) {
132
+ await flushStorageToDisk(storage, validatedConfig);
427
133
  }
428
- response = { ...response, start, end, delta, human, ...params };
429
- res.send(response);
430
- return;
431
- }
432
- });
433
-
434
134
 
435
- /**
436
- * @typedef {import('mixpanel-import').ImportResults} ImportResults
437
- */
438
- async function spawn_file_workers(numberWorkers, payload, params) {
439
- const auth = new GoogleAuth();
440
- let client;
441
- if (RUNTIME_URL.includes('localhost')) {
442
- client = await auth.getClient();
443
- }
444
- else {
445
- client = await auth.getIdTokenClient(RUNTIME_URL);
446
- }
447
- const limit = pLimit(CONCURRENCY);
448
- const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
135
+ // Step 12: Send to Mixpanel (if token provided)
136
+ let importResults;
137
+ if (validatedConfig.token) {
138
+ importResults = await sendToMixpanel(context);
139
+ }
449
140
 
450
- const requestPromises = Array.from({ length: numberWorkers }, async (_, index) => {
451
- index = index + 1;
452
- await delay(index * 108);
453
- return limit(() => build_request(client, payload, index, params, numberWorkers));
454
- });
455
- const complete = await Promise.allSettled(requestPromises);
456
- const results = {
457
- jobs_success: complete.filter((p) => p.status === "fulfilled").length,
458
- jobs_fail: complete.filter((p) => p.status === "rejected").length
459
- };
141
+ // Step 13: Compile results
142
+ jobTimer.stop(false);
143
+ const { start, end, delta, human } = jobTimer.report(false);
460
144
 
461
- return results;
462
- }
145
+ const extractedData = extractStorageData(storage);
463
146
 
147
+ return {
148
+ ...extractedData,
149
+ importResults,
150
+ files: extractFileInfo(storage),
151
+ time: { start, end, delta, human },
152
+ operations: context.getOperations(),
153
+ eventCount: context.getEventCount(),
154
+ userCount: context.getUserCount()
155
+ };
464
156
 
465
- async function build_request(client, payload, index, params, total) {
466
- let retryAttempt = 0;
467
- sLog(`DM4: summoning worker #${index} of ${total}`, params);
468
- try {
469
- const req = await client.request({
470
- url: RUNTIME_URL + `?replicate=1&is_replica=true&runId=${params.runId || "no run id"}`,
471
- method: "POST",
472
- data: payload,
473
- headers: {
474
- "Content-Type": "text/plain",
475
- },
476
- timeout: 3600 * 1000 * 10,
477
- retryConfig: {
478
- retry: 3,
479
- onRetryAttempt: (error) => {
480
- const statusCode = error?.response?.status?.toString() || "";
481
- retryAttempt++;
482
- sLog(`DM4: summon worker ${index} retry #${retryAttempt}`, { statusCode, message: error.message, stack: error.stack, ...params }, "DEBUG");
483
- },
484
- retryDelay: 1000,
485
- shouldRetry: (error) => {
486
- if (error.code === 'ECONNRESET') return true;
487
- const statusCode = error?.response?.status;
488
- if (statusCode >= 500) return true;
489
- if (statusCode === 429) return true;
490
- }
491
- },
492
- });
493
- sLog(`DM4: worker #${index} responded`, params);
494
- const { data } = req;
495
- return data;
496
157
  } catch (error) {
497
- sLog(`DM4: worker #${index} failed to respond`, { message: error.message, stack: error.stack, code: error.code, retries: retryAttempt, ...params }, "ERROR");
498
- return {};
158
+ if (isCLI || validatedConfig.verbose) {
159
+ console.error(`\n❌ Error: ${error.message}\n`);
160
+ if (validatedConfig.verbose) {
161
+ console.error(error.stack);
162
+ }
163
+ } else {
164
+ sLog("Main execution error", { error: error.message, stack: error.stack }, "ERROR");
165
+ }
166
+ throw error;
499
167
  }
500
168
  }
501
169
 
502
-
503
-
504
- /*
505
- ------
506
- MODELS
507
- ------
508
- */
509
-
510
170
  /**
511
- * creates a mixpanel event with a flat shape
512
- * @param {string} distinct_id
513
- * @param {number} earliestTime
514
- * @param {EventConfig} chosenEvent
515
- * @param {string[]} [anonymousIds]
516
- * @param {string[]} [sessionIds]
517
- * @param {Object} [superProps]
518
- * @param {Object} [groupKeys]
519
- * @param {Boolean} [isFirstEvent]
520
- * @return {Promise<EventSchema>}
171
+ * Generate ad spend data for configured date range
172
+ * @param {Context} context - Context object
521
173
  */
522
- async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, sessionIds, superProps, groupKeys, isFirstEvent, skipDefaults = false) {
523
- operations++;
524
- eventCount++;
525
- if (!distinct_id) throw new Error("no distinct_id");
526
- if (!anonymousIds) anonymousIds = [];
527
- if (!sessionIds) sessionIds = [];
528
- if (!earliestTime) throw new Error("no earliestTime");
529
- if (!chosenEvent) throw new Error("no chosenEvent");
530
- if (!superProps) superProps = {};
531
- if (!groupKeys) groupKeys = [];
532
- if (!isFirstEvent) isFirstEvent = false;
533
- const chance = u.getChance();
534
- const { mean = 0, deviation = 2, peaks = 5 } = CONFIG?.soup || {};
535
- const {
536
- hasAndroidDevices = false,
537
- hasBrowser = false,
538
- hasCampaigns = false,
539
- hasDesktopDevices = false,
540
- hasIOSDevices = false,
541
- hasLocation = false
542
- } = CONFIG || {};
543
-
544
- //event model
545
- const eventTemplate = {
546
- event: chosenEvent.event,
547
- source: "dm4",
548
- time: "",
549
- insert_id: "",
550
- };
551
-
552
- let defaultProps = {};
553
- let devicePool = [];
554
-
555
- if (hasLocation) defaultProps.location = u.shuffleArray(DEFAULTS.locationsEvents()).pop();
556
- if (hasBrowser) defaultProps.browser = u.choose(DEFAULTS.browsers());
557
- if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
558
- if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
559
- if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
560
-
561
- // we don't always have campaigns, because of attribution
562
- if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = u.shuffleArray(DEFAULTS.campaigns()).pop();
563
- const devices = devicePool.flat();
564
- if (devices.length) defaultProps.device = u.shuffleArray(devices).pop();
565
-
174
+ async function generateAdSpendData(context) {
175
+ const { config, storage } = context;
176
+ const { numDays } = config;
566
177
 
178
+ for (let day = 0; day < numDays; day++) {
179
+ const targetDay = dayjs.unix(global.FIXED_BEGIN).add(day, 'day').toISOString();
180
+ const adSpendEvents = await makeAdSpend(context, targetDay);
567
181
 
568
-
569
-
570
-
571
- //event time
572
- if (earliestTime) {
573
- if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
574
- if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, FIXED_NOW, peaks, deviation, mean);
575
- }
576
-
577
- // anonymous and session ids
578
- if (anonymousIds.length) eventTemplate.device_id = chance.pickone(anonymousIds);
579
- if (sessionIds.length) eventTemplate.session_id = chance.pickone(sessionIds);
580
-
581
- //sometimes have a user_id
582
- if (!isFirstEvent && chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
583
-
584
- // ensure that there is a user_id or device_id
585
- if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
586
-
587
- const props = { ...chosenEvent.properties, ...superProps };
588
-
589
- //iterate through custom properties
590
- for (const key in props) {
591
- try {
592
- eventTemplate[key] = u.choose(props[key]);
593
- } catch (e) {
594
- console.error(`error with ${key} in ${chosenEvent.event} event`, e);
595
- debugger;
596
- }
597
- }
598
-
599
- //iterate through default properties
600
- if (!skipDefaults) {
601
- for (const key in defaultProps) {
602
- if (Array.isArray(defaultProps[key])) {
603
- const choice = u.choose(defaultProps[key]);
604
- if (typeof choice === "string") {
605
- if (!eventTemplate[key]) eventTemplate[key] = choice;
606
- }
607
-
608
- else if (Array.isArray(choice)) {
609
- for (const subChoice of choice) {
610
- if (!eventTemplate[key]) eventTemplate[key] = subChoice;
611
- }
612
- }
613
-
614
- else if (typeof choice === "object") {
615
- for (const subKey in choice) {
616
- if (typeof choice[subKey] === "string") {
617
- if (!eventTemplate[subKey]) eventTemplate[subKey] = choice[subKey];
618
- }
619
- else if (Array.isArray(choice[subKey])) {
620
- const subChoice = u.choose(choice[subKey]);
621
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
622
- }
623
-
624
- else if (typeof choice[subKey] === "object") {
625
- for (const subSubKey in choice[subKey]) {
626
- if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = choice[subKey][subSubKey];
627
- }
628
- }
629
-
630
- }
631
- }
632
- }
633
- else if (typeof (defaultProps[key]) === "object") {
634
- const obj = defaultProps[key];
635
- for (const subKey in obj) {
636
- if (Array.isArray(obj[subKey])) {
637
- const subChoice = u.choose(obj[subKey]);
638
- if (Array.isArray(subChoice)) {
639
- for (const subSubChoice of subChoice) {
640
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subSubChoice;
641
- }
642
- }
643
- else if (typeof subChoice === "object") {
644
- for (const subSubKey in subChoice) {
645
- if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = subChoice[subSubKey];
646
- }
647
- }
648
- else {
649
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
650
- }
651
- }
652
- else {
653
- if (!eventTemplate[subKey]) eventTemplate[subKey] = obj[subKey];
654
- }
655
- }
656
- }
657
- else {
658
- if (!eventTemplate[key]) eventTemplate[key] = defaultProps[key];
182
+ if (adSpendEvents.length > 0) {
183
+ for (const adSpendEvent of adSpendEvents) {
184
+ await storage.adSpendData.hookPush(adSpendEvent);
659
185
  }
660
186
  }
661
187
  }
662
-
663
- //iterate through groups
664
- for (const groupPair of groupKeys) {
665
- const groupKey = groupPair[0];
666
- const groupCardinality = groupPair[1];
667
- const groupEvents = groupPair[2] || [];
668
-
669
- // empty array for group events means all events
670
- if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
671
- if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
672
- }
673
-
674
- //make $insert_id
675
- eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
676
-
677
- // move time forward
678
- if (earliestTime) {
679
- const timeShifted = dayjs(eventTemplate.time).add(timeShift, "seconds").toISOString();
680
- eventTemplate.time = timeShifted;
681
- }
682
-
683
-
684
- return eventTemplate;
685
188
  }
686
189
 
687
190
  /**
688
- * takes a description of a funnel an generates successful and unsuccessful conversions
689
- * this is called MANY times per user
690
- * @param {Funnel} funnel
691
- * @param {Person} user
692
- * @param {number} firstEventTime
693
- * @param {UserProfile | Object} [profile]
694
- * @param {Record<string, SCDSchema[]> | Object} [scd]
695
- * @param {Config} [config]
696
- * @return {Promise<[EventSchema[], Boolean]>}
191
+ * Generate group profiles for all configured group keys
192
+ * @param {Context} context - Context object
697
193
  */
698
- async function makeFunnel(funnel, user, firstEventTime, profile, scd, config) {
699
- if (!funnel) throw new Error("no funnel");
700
- if (!user) throw new Error("no user");
701
- if (!profile) profile = {};
702
- if (!scd) scd = {};
194
+ async function generateGroupProfiles(context) {
195
+ const { config, storage } = context;
196
+ const { groupKeys, groupProps = {} } = config;
703
197
 
704
- const chance = u.getChance();
705
- const { hook = async (a) => a } = config;
706
- await hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
707
- let {
708
- sequence,
709
- conversionRate = 50,
710
- order = 'sequential',
711
- timeToConvert = 1,
712
- props,
713
- requireRepeats = false,
714
- } = funnel;
715
- const { distinct_id, created, anonymousIds, sessionIds } = user;
716
- const { superProps, groupKeys } = config;
717
-
718
-
719
- //choose the properties for this funnel
720
- const chosenFunnelProps = { ...props, ...superProps };
721
- for (const key in props) {
722
- try {
723
- chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
724
- } catch (e) {
725
- console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
726
- debugger;
727
- }
728
- }
729
-
730
- const funnelPossibleEvents = sequence
731
- .map((eventName) => {
732
- const foundEvent = config?.events?.find((e) => e.event === eventName);
733
- /** @type {EventConfig} */
734
- const eventSpec = clone(foundEvent) || { event: eventName, properties: {} };
735
- for (const key in eventSpec.properties) {
736
- try {
737
- eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
738
- } catch (e) {
739
- console.error(`error with ${key} in ${eventSpec.event} event`, e);
740
- debugger;
741
- }
742
- }
743
- delete eventSpec.isFirstEvent;
744
- delete eventSpec.weight;
745
- eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
746
- return eventSpec;
747
- })
748
- .reduce((acc, step) => {
749
- if (!requireRepeats) {
750
- if (acc.find(e => e.event === step.event)) {
751
- if (chance.bool({ likelihood: 50 })) {
752
- conversionRate = Math.floor(conversionRate * 1.35); //increase conversion rate
753
- acc.push(step);
754
- }
755
- //A SKIPPED STEP!
756
- else {
757
- conversionRate = Math.floor(conversionRate * .70); //reduce conversion rate
758
- return acc; //early return to skip the step
759
- }
760
- }
761
- else {
762
- acc.push(step);
763
- }
764
- }
765
- else {
766
- acc.push(step);
767
- }
768
- return acc;
769
- }, []);
770
-
771
- if (conversionRate > 100) conversionRate = 100;
772
- if (conversionRate < 0) conversionRate = 0;
773
- let doesUserConvert = chance.bool({ likelihood: conversionRate });
774
- let numStepsUserWillTake = sequence.length;
775
- if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
776
- const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
777
- const msInHour = 60000 * 60;
778
- const funnelStepsUserWillTake = funnelPossibleEvents.slice(0, numStepsUserWillTake);
779
-
780
- let funnelActualOrder = [];
781
-
782
- switch (order) {
783
- case "sequential":
784
- funnelActualOrder = funnelStepsUserWillTake;
785
- break;
786
- case "random":
787
- funnelActualOrder = u.shuffleArray(funnelStepsUserWillTake);
788
- break;
789
- case "first-fixed":
790
- funnelActualOrder = u.shuffleExceptFirst(funnelStepsUserWillTake);
791
- break;
792
- case "last-fixed":
793
- funnelActualOrder = u.shuffleExceptLast(funnelStepsUserWillTake);
794
- break;
795
- case "first-and-last-fixed":
796
- funnelActualOrder = u.fixFirstAndLast(funnelStepsUserWillTake);
797
- break;
798
- case "middle-fixed":
799
- funnelActualOrder = u.shuffleOutside(funnelStepsUserWillTake);
800
- break;
801
- case "interrupted":
802
- const potentialSubstitutes = config?.events
803
- ?.filter(e => !e.isFirstEvent)
804
- ?.filter(e => !sequence.includes(e.event)) || [];
805
- funnelActualOrder = u.interruptArray(funnelStepsUserWillTake, potentialSubstitutes);
806
- break;
807
- default:
808
- funnelActualOrder = funnelStepsUserWillTake;
809
- break;
198
+ if (isCLI || config.verbose) {
199
+ console.log('\n👥 Generating group profiles...');
810
200
  }
811
201
 
202
+ for (let i = 0; i < groupKeys.length; i++) {
203
+ const [groupKey, groupCount] = groupKeys[i];
204
+ const groupContainer = storage.groupProfilesData[i];
812
205
 
206
+ if (!groupContainer) {
207
+ console.warn(`Warning: No storage container found for group key: ${groupKey}`);
208
+ continue;
209
+ }
813
210
 
814
- let lastTimeJump = 0;
815
- const funnelActualEventsWithOffset = funnelActualOrder
816
- .map((event, index) => {
817
- if (index === 0) {
818
- event.relativeTimeMs = 0;
819
- return event;
820
- }
821
-
822
- // Calculate base increment for each step
823
- const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
824
-
825
- // Introduce a random fluctuation factor
826
- const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
827
-
828
- // Ensure the time increments are increasing and add randomness
829
- const previousTime = lastTimeJump;
830
- const currentTime = previousTime + baseIncrement + fluctuation;
831
-
832
- // Assign the calculated time to the event
833
- const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
834
- lastTimeJump = chosenTime;
835
- event.relativeTimeMs = chosenTime;
836
- return event;
837
- });
838
-
839
-
840
- const earliestTime = firstEventTime || dayjs(created).unix();
841
- let funnelStartTime;
842
- let finalEvents = await Promise.all(funnelActualEventsWithOffset
843
- .map(async (event, index) => {
844
- const newEvent = await makeEvent(distinct_id, earliestTime, event, anonymousIds, sessionIds, {}, groupKeys);
845
- if (index === 0) {
846
- funnelStartTime = dayjs(newEvent.time);
847
- delete newEvent.relativeTimeMs;
848
- return Promise.resolve(newEvent);
849
- }
850
- try {
851
- newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
852
- delete newEvent.relativeTimeMs;
853
- return Promise.resolve(newEvent);
854
- }
855
- catch (e) {
856
- //shouldn't happen
857
- debugger;
858
- }
859
- }));
860
-
211
+ if (isCLI || config.verbose) {
212
+ console.log(` Creating ${groupCount.toLocaleString()} ${groupKey} profiles...`);
213
+ }
861
214
 
862
- await hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
863
- return [finalEvents, doesUserConvert];
864
- }
215
+ // Get group-specific props if available
216
+ const specificGroupProps = groupProps[groupKey] || {};
865
217
 
866
- /**
867
- * a function that creates a profile (user or group)
868
- * @overload
869
- * @param {{[key: string]: ValueValid}} props
870
- * @param {{[key: string]: ValueValid}} [defaults]
871
- * @returns {Promise<UserProfile>}
872
- *
873
- * @overload
874
- * @param {{[key: string]: ValueValid}} props
875
- * @param {{[key: string]: ValueValid}} [defaults]
876
- * @returns {Promise<GroupProfile>}
877
- */
878
- async function makeProfile(props, defaults) {
879
- operations++;
880
- const keysToNotChoose = ["anonymousIds", "sessionIds"];
881
-
882
- const profile = {
883
- ...defaults,
884
- };
218
+ for (let j = 0; j < groupCount; j++) {
219
+ const groupProfile = await makeGroupProfile(context, groupKey, specificGroupProps, {
220
+ [groupKey]: `${groupKey}_${j + 1}`
221
+ });
885
222
 
886
- for (const key in profile) {
887
- if (keysToNotChoose.includes(key)) continue;
888
- try {
889
- profile[key] = u.choose(profile[key]);
890
- }
891
- catch (e) {
892
- // never gets here
893
- debugger;
223
+ await groupContainer.hookPush(groupProfile);
894
224
  }
895
225
  }
896
226
 
897
-
898
- for (const key in props) {
899
- try {
900
- profile[key] = u.choose(props[key]);
901
- } catch (e) {
902
- // never gets here
903
- debugger;
904
- }
227
+ if (isCLI || config.verbose) {
228
+ console.log('✅ Group profiles generated successfully');
905
229
  }
906
-
907
- return profile;
908
230
  }
909
231
 
910
232
  /**
911
- * @param {SCDProp} scdProp
912
- * @param {string} scdKey
913
- * @param {string} distinct_id
914
- * @param {number} mutations
915
- * @param {string} created
916
- * @return {Promise<SCDSchema[]>}
233
+ * Generate lookup tables for all configured lookup schemas
234
+ * @param {Context} context - Context object
917
235
  */
918
- async function makeSCD(scdProp, scdKey, distinct_id, mutations, created) {
919
- if (Array.isArray(scdProp)) scdProp = { values: scdProp, frequency: 'week', max: 10, timing: 'fuzzy', type: 'user' };
920
- const { frequency, max, timing, values, type = "user" } = scdProp;
921
- if (JSON.stringify(values) === "{}" || JSON.stringify(values) === "[]") return [];
922
- const scdEntries = [];
923
- let lastInserted = dayjs(created);
924
- const deltaDays = dayjs().diff(lastInserted, "day");
925
- const uuidKeyName = type === 'user' ? 'distinct_id' : type;
926
-
927
- for (let i = 0; i < mutations; i++) {
928
- if (lastInserted.isAfter(dayjs())) break;
929
- let scd = await makeProfile({ [scdKey]: values }, { [uuidKeyName]: distinct_id });
930
-
931
- // Explicitly constructing SCDSchema object with all required properties
932
- const scdEntry = {
933
- ...scd, // spread existing properties
934
- [uuidKeyName]: scd.distinct_id || distinct_id, // ensure distinct_id is set
935
- startTime: null,
936
- insertTime: null
937
- };
938
-
939
- if (timing === 'fixed') {
940
- if (frequency === "day") scdEntry.startTime = lastInserted.add(1, "day").startOf('day').toISOString();
941
- if (frequency === "week") scdEntry.startTime = lastInserted.add(1, "week").startOf('week').toISOString();
942
- if (frequency === "month") scdEntry.startTime = lastInserted.add(1, "month").startOf('month').toISOString();
943
- }
944
-
945
- if (timing === 'fuzzy') {
946
- scdEntry.startTime = lastInserted.toISOString();
947
- }
948
-
949
- const insertTime = lastInserted.add(u.integer(1, 9000), "seconds");
950
- scdEntry.insertTime = insertTime.toISOString();
236
+ async function generateLookupTables(context) {
237
+ const { config, storage } = context;
238
+ const { lookupTables } = config;
951
239
 
240
+ if (isCLI || config.verbose) {
241
+ console.log('\n🔍 Generating lookup tables...');
242
+ }
952
243
 
244
+ for (let i = 0; i < lookupTables.length; i++) {
245
+ const lookupConfig = lookupTables[i];
246
+ const { key, entries, attributes } = lookupConfig;
247
+ const lookupContainer = storage.lookupTableData[i];
953
248
 
954
- // Ensure TypeScript sees all required properties are set
955
- if (scdEntry.hasOwnProperty('insertTime') && scdEntry.hasOwnProperty('startTime')) {
956
- scdEntries.push(scdEntry);
249
+ if (!lookupContainer) {
250
+ console.warn(`Warning: No storage container found for lookup table: ${key}`);
251
+ continue;
957
252
  }
958
253
 
959
- //advance time for next entry
960
- lastInserted = lastInserted
961
- .add(u.integer(0, deltaDays), "day")
962
- .subtract(u.integer(1, 9000), "seconds");
963
- }
964
-
965
- //de-dupe on startTime
966
- const deduped = scdEntries.filter((entry, index, self) =>
967
- index === self.findIndex((t) => (
968
- t.startTime === entry.startTime
969
- ))
970
- );
971
- return deduped;
972
- }
254
+ if (isCLI || config.verbose) {
255
+ console.log(` Creating ${entries.toLocaleString()} ${key} lookup entries...`);
256
+ }
973
257
 
258
+ for (let j = 0; j < entries; j++) {
259
+ const lookupEntry = await makeProfile(context, attributes, {
260
+ [key]: `${key}_${j + 1}`
261
+ });
974
262
 
975
- /**
976
- * creates ad spend events for a given day for all campaigns in default campaigns
977
- * @param {string} day
978
- * @return {Promise<EventSchema[]>}
979
- */
980
- async function makeAdSpend(day, campaigns = CAMPAIGNS) {
981
- operations++;
982
- const chance = u.getChance();
983
- const adSpendEvents = [];
984
- for (const network of campaigns) {
985
- const campaigns = network.utm_campaign;
986
- loopCampaigns: for (const campaign of campaigns) {
987
- if (campaign === "$organic") continue loopCampaigns;
988
-
989
- const CAC = u.integer(42, 420); //todo: get the # of users created in this day from eventData
990
- // Randomly generating cost
991
- const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
992
-
993
- // Ensuring realistic CPC and CTR
994
- const avgCPC = chance.floating({ min: 0.33, max: 2.00, fixed: 4 });
995
- const avgCTR = chance.floating({ min: 0.05, max: 0.25, fixed: 4 });
996
-
997
- // Deriving impressions from cost and avg CPC
998
- const clicks = Math.floor(cost / avgCPC);
999
- const impressions = Math.floor(clicks / avgCTR);
1000
- const views = Math.floor(impressions * avgCTR);
1001
-
1002
- //tags
1003
- const utm_medium = u.choose(u.pickAWinner(network.utm_medium)());
1004
- const utm_content = u.choose(u.pickAWinner(network.utm_content)());
1005
- const utm_term = u.choose(u.pickAWinner(network.utm_term)());
1006
- //each of these is a campaign
1007
- const id = network.utm_source[0] + '-' + campaign;
1008
- const uid = md5(id);
1009
- const adSpendEvent = {
1010
- event: "$ad_spend",
1011
- time: day,
1012
- source: 'dm4',
1013
- utm_campaign: campaign,
1014
- campaign_id: id,
1015
- insert_id: uid,
1016
- network: network.utm_source[0].toUpperCase(),
1017
- distinct_id: network.utm_source[0].toUpperCase(),
1018
- utm_source: network.utm_source[0],
1019
- utm_medium,
1020
- utm_content,
1021
- utm_term,
1022
-
1023
-
1024
- clicks,
1025
- views,
1026
- impressions,
1027
- cost,
1028
- date: dayjs(day).format("YYYY-MM-DD"),
1029
- };
1030
- adSpendEvents.push(adSpendEvent);
263
+ await lookupContainer.hookPush(lookupEntry);
1031
264
  }
265
+ }
1032
266
 
1033
-
267
+ if (isCLI || config.verbose) {
268
+ console.log('✅ Lookup tables generated successfully');
1034
269
  }
1035
- return adSpendEvents;
1036
270
  }
1037
271
 
1038
272
  /**
1039
- * takes event data and creates mirror datasets in a future state
1040
- * depending on the mirror strategy
1041
- * @param {Config} config
1042
- * @param {Storage} storage
1043
- * @return {Promise<void>}
273
+ * Generate SCDs for group entities
274
+ * @param {Context} context - Context object
1044
275
  */
1045
- async function makeMirror(config, storage) {
1046
- const { mirrorProps } = config;
1047
- const { eventData, mirrorEventData } = storage;
1048
- const now = dayjs();
1049
-
1050
- for (const oldEvent of eventData) {
1051
- let newEvent;
1052
- const eventTime = dayjs(oldEvent.time);
1053
- const delta = now.diff(eventTime, "day");
1054
-
1055
- for (const mirrorProp in mirrorProps) {
1056
- const prop = mirrorProps[mirrorProp];
1057
- const { daysUnfilled = 7, events = "*", strategy = "create", values = [] } = prop;
1058
- if (events === "*" || events.includes(oldEvent.event)) {
1059
- if (!newEvent) newEvent = clone(oldEvent);
1060
-
1061
- switch (strategy) {
1062
- case "create":
1063
- newEvent[mirrorProp] = u.choose(values);
1064
- break;
1065
- case "delete":
1066
- delete newEvent[mirrorProp];
1067
- break;
1068
- case "fill":
1069
- if (delta >= daysUnfilled) oldEvent[mirrorProp] = u.choose(values);
1070
- newEvent[mirrorProp] = u.choose(values);
1071
- break;
1072
- case "update":
1073
- if (!oldEvent[mirrorProp]) {
1074
- newEvent[mirrorProp] = u.choose(values);
1075
- }
1076
- else {
1077
- newEvent[mirrorProp] = oldEvent[mirrorProp];
1078
- }
1079
- break;
1080
- default:
1081
- throw new Error(`strategy ${strategy} is unknown`);
1082
- }
1083
-
1084
-
1085
- }
1086
- }
1087
-
1088
- const mirrorDataPoint = newEvent ? newEvent : oldEvent;
1089
- await mirrorEventData.hookPush(mirrorDataPoint);
276
+ async function generateGroupSCDs(context) {
277
+ const { config, storage } = context;
278
+ const { scdProps, groupKeys } = config;
1090
279
 
280
+ if (isCLI || config.verbose) {
281
+ console.log('\n📊 Generating group SCDs...');
1091
282
  }
1092
- }
1093
-
1094
-
1095
- /*
1096
- --------------
1097
- ORCHESTRATORS
1098
- --------------
1099
- */
1100
283
 
1101
-
1102
- /**
1103
- * a loop that creates users and their events; the loop is inside this function
1104
- * @param {Config} config
1105
- * @param {Storage} storage
1106
- * @param {number} [concurrency]
1107
- * @return {Promise<void>}
1108
- */
1109
- async function userLoop(config, storage, concurrency = 1) {
284
+ // Import utilities and generators
285
+ const { objFilter } = await import('ak-tools');
286
+ const { makeSCD } = await import('./lib/generators/scd.js');
287
+ const u = await import('./lib/utils/utils.js');
1110
288
  const chance = u.getChance();
1111
- const USER_CONN = pLimit(concurrency);
1112
- const {
1113
- verbose,
1114
- numUsers,
1115
- numEvents,
1116
- isAnonymous,
1117
- hasAvatar,
1118
- hasAnonIds,
1119
- hasSessionIds,
1120
- hasLocation,
1121
- funnels,
1122
- userProps,
1123
- scdProps,
1124
- numDays,
1125
- percentUsersBornInDataset = 5,
1126
- } = config;
1127
- const { eventData, userProfilesData, scdTableData } = storage;
1128
- const avgEvPerUser = numEvents / numUsers;
1129
- const startTime = Date.now();
1130
-
1131
- for (let i = 0; i < numUsers; i++) {
1132
- await USER_CONN(async () => {
1133
- userCount++;
1134
- const eps = Math.floor(eventCount / ((Date.now() - startTime) / 1000));
1135
- if (verbose) u.progress([["users", userCount], ["events", eventCount], ["eps", eps]]);
1136
- const userId = chance.guid();
1137
- const user = u.generateUser(userId, { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds });
1138
- const { distinct_id, created } = user;
1139
- const userIsBornInDataset = chance.bool({ likelihood: percentUsersBornInDataset });
1140
- let numEventsPreformed = 0;
1141
- if (!userIsBornInDataset) delete user.created;
1142
- const adjustedCreated = userIsBornInDataset ? dayjs(created).subtract(daysShift, 'd') : dayjs.unix(global.FIXED_BEGIN);
1143
-
1144
- if (hasLocation) {
1145
- const location = u.shuffleArray(u.choose(DEFAULTS.locationsUsers)).pop();
1146
- for (const key in location) {
1147
- user[key] = location[key];
1148
- }
1149
- }
1150
289
 
1151
- // Profile creation
1152
- const profile = await makeProfile(userProps, user);
290
+ // Get only group SCDs (not user SCDs)
291
+ // @ts-ignore
292
+ const groupSCDProps = objFilter(scdProps, (scd) => scd.type && scd.type !== 'user');
1153
293
 
294
+ for (const [groupKey, groupCount] of groupKeys) {
295
+ // Filter SCDs that apply to this specific group key
296
+ // @ts-ignore
297
+ const groupSpecificSCDs = objFilter(groupSCDProps, (scd) => scd.type === groupKey);
1154
298
 
1155
- // SCD creation
1156
- const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
1157
- const scdTableKeys = Object.keys(scdUserTables);
299
+ if (Object.keys(groupSpecificSCDs).length === 0) {
300
+ continue; // No SCDs for this group type
301
+ }
1158
302
 
303
+ if (isCLI || config.verbose) {
304
+ console.log(` Generating SCDs for ${groupCount.toLocaleString()} ${groupKey} entities...`);
305
+ }
1159
306
 
1160
- const userSCD = {};
1161
- for (const [index, key] of scdTableKeys.entries()) {
1162
- // @ts-ignore
1163
- const { max = 100 } = scdProps[key];
1164
- const mutations = chance.integer({ min: 1, max });
1165
- const changes = await makeSCD(scdProps[key], key, distinct_id, mutations, created);
1166
- userSCD[key] = changes;
1167
- await config.hook(changes, "scd-pre", { profile, type: 'user', scd: { [key]: scdProps[key] }, config, allSCDs: userSCD });
1168
- }
307
+ // Generate SCDs for each group entity
308
+ for (let i = 0; i < groupCount; i++) {
309
+ const groupId = `${groupKey}_${i + 1}`;
1169
310
 
1170
- let numEventsThisUserWillPreform = Math.floor(chance.normal({
1171
- mean: avgEvPerUser,
1172
- dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
1173
- }) * 0.714159265359);
1174
-
1175
- // Power users and Shitty users logic...
1176
- chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
1177
- chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
1178
- numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
1179
-
1180
- let userFirstEventTime;
1181
-
1182
- const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
1183
- const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
1184
-
1185
- const secondsInDay = 86400;
1186
- const noise = () => chance.integer({ min: 0, max: secondsInDay });
1187
- let usersEvents = [];
1188
-
1189
- if (firstFunnels.length && userIsBornInDataset) {
1190
- const firstFunnel = chance.pickone(firstFunnels, user);
1191
-
1192
- const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
1193
- const [data, userConverted] = await makeFunnel(firstFunnel, user, firstTime, profile, userSCD, config);
1194
- userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
1195
- numEventsPreformed += data.length;
1196
- // await eventData.hookPush(data, { profile });
1197
- usersEvents.push(...data);
1198
- if (!userConverted) {
1199
- if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
1200
- return;
1201
- }
1202
- } else {
1203
- // userFirstEventTime = dayjs(created).unix();
1204
- // userFirstEventTime = global.FIXED_BEGIN;
1205
- userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
1206
- }
311
+ // Generate SCDs for this group entity
312
+ for (const [scdKey, scdConfig] of Object.entries(groupSpecificSCDs)) {
313
+ const { max = 10 } = scdConfig;
314
+ const mutations = chance.integer({ min: 1, max });
1207
315
 
1208
- while (numEventsPreformed < numEventsThisUserWillPreform) {
1209
- if (usageFunnels.length) {
1210
- const currentFunnel = chance.pickone(usageFunnels);
1211
- const [data, userConverted] = await makeFunnel(currentFunnel, user, userFirstEventTime, profile, userSCD, config);
1212
- numEventsPreformed += data.length;
1213
- usersEvents.push(...data);
1214
- // await eventData.hookPush(data, { profile });
1215
- } else {
1216
- const data = await makeEvent(distinct_id, userFirstEventTime, u.pick(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
1217
- numEventsPreformed++;
1218
- usersEvents.push(data);
1219
- // await eventData.hookPush(data);
316
+ // Use a base time for the group entity (similar to user creation time)
317
+ const baseTime = context.FIXED_BEGIN || context.FIXED_NOW;
318
+ const changes = await makeSCD(context, scdConfig, scdKey, groupId, mutations, baseTime);
319
+
320
+ // Apply hook if configured
321
+ if (config.hook) {
322
+ await config.hook(changes, "scd-pre", {
323
+ type: 'group',
324
+ groupKey,
325
+ scd: { [scdKey]: scdConfig },
326
+ config
327
+ });
1220
328
  }
1221
- }
1222
329
 
1223
- // NOW ADD ALL OUR DATA FOR THIS USER
1224
- if (config.hook) {
1225
- const newEvents = await config.hook(usersEvents, "everything", { profile, scd: userSCD, config, userIsBornInDataset });
1226
- if (Array.isArray(newEvents)) usersEvents = newEvents;
1227
- }
1228
-
1229
- await userProfilesData.hookPush(profile);
1230
-
1231
- if (Object.keys(userSCD).length) {
1232
- for (const [key, changesArray] of Object.entries(userSCD)) {
1233
- for (const changes of changesArray) {
1234
- const target = scdTableData.filter(arr => arr.scdKey === key).pop();
1235
- await target.hookPush(changes, { profile, type: 'user' });
330
+ // Store SCDs in the appropriate SCD table
331
+ for (const change of changes) {
332
+ try {
333
+ const target = storage.scdTableData.filter(arr => arr.scdKey === scdKey).pop();
334
+ await target.hookPush(change, { type: 'group', groupKey });
335
+ } catch (e) {
336
+ // Fallback for tests
337
+ const target = storage.scdTableData[0];
338
+ await target.hookPush(change, { type: 'group', groupKey });
1236
339
  }
1237
340
  }
1238
341
  }
1239
- await eventData.hookPush(usersEvents, { profile });
1240
-
1241
-
1242
- if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
1243
- });
342
+ }
1244
343
  }
1245
344
 
345
+ if (isCLI || config.verbose) {
346
+ console.log('✅ Group SCDs generated successfully');
347
+ }
1246
348
  }
1247
349
 
1248
-
1249
350
  /**
1250
- * sends the data to mixpanel
1251
- * todo: this needs attention
1252
- * @param {Config} config
1253
- * @param {Storage} storage
351
+ * Generate charts for data visualization
352
+ * @param {Context} context - Context object
1254
353
  */
1255
- async function sendToMixpanel(config, storage) {
1256
- const {
1257
- adSpendData,
1258
- eventData,
1259
- groupProfilesData,
1260
- lookupTableData,
1261
- mirrorEventData,
1262
- scdTableData,
1263
- userProfilesData,
1264
- groupEventData
1265
-
1266
- } = storage;
1267
- const { token, region, writeToDisk = true } = config;
1268
- const importResults = { events: {}, users: {}, groups: [] };
1269
-
1270
- /** @type {import('mixpanel-import').Creds} */
1271
- const creds = { token };
1272
- const { format } = config;
1273
- const mpImportFormat = format === "json" ? "jsonl" : "csv";
1274
- /** @type {import('mixpanel-import').Options} */
1275
- const commonOpts = {
1276
- region,
1277
- fixData: true,
1278
- verbose: false,
1279
- forceStream: true,
1280
- strict: true, //false,
1281
- epochEnd: dayjs().unix(), //is this chill?
1282
- dryRun: false,
1283
- abridged: false,
1284
- fixJson: true,
1285
- showProgress: NODE_ENV === "dev" ? true : false,
1286
- streamFormat: mpImportFormat
1287
- };
354
+ async function generateCharts(context) {
355
+ const { config, storage } = context;
1288
356
 
1289
- if (isCLI) commonOpts.showProgress = true;
357
+ if (config.makeChart && storage.eventData?.length > 0) {
358
+ const chartPath = typeof config.makeChart === 'string'
359
+ ? config.makeChart
360
+ : `./charts/${config.simulationName}-timeline.png`;
1290
361
 
362
+ await generateLineChart(storage.eventData, undefined, chartPath);
1291
363
 
1292
-
1293
- if (eventData || isBATCH_MODE) {
1294
- log(`importing events to mixpanel...\n`);
1295
- let eventDataToImport = clone(eventData);
1296
- if (isBATCH_MODE) {
1297
- const writeDir = eventData.getWriteDir();
1298
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1299
- eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
1300
- }
1301
- const imported = await mp(creds, eventDataToImport, {
1302
- recordType: "event",
1303
- ...commonOpts,
1304
- });
1305
- log(`\tsent ${comma(imported.success)} events\n`);
1306
- importResults.events = imported;
1307
- }
1308
- if (userProfilesData || isBATCH_MODE) {
1309
- log(`importing user profiles to mixpanel...\n`);
1310
- let userProfilesToImport = clone(userProfilesData);
1311
- if (isBATCH_MODE) {
1312
- const writeDir = userProfilesData.getWriteDir();
1313
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1314
- userProfilesToImport = files.filter(f => f.includes('-USERS-'));
1315
- }
1316
- const imported = await mp(creds, userProfilesToImport, {
1317
- recordType: "user",
1318
- ...commonOpts,
1319
- });
1320
- log(`\tsent ${comma(imported.success)} user profiles\n`);
1321
- importResults.users = imported;
1322
- }
1323
- if (groupEventData || isBATCH_MODE) {
1324
- log(`importing ad spend data to mixpanel...\n`);
1325
- let adSpendDataToImport = clone(adSpendData);
1326
- if (isBATCH_MODE) {
1327
- const writeDir = adSpendData.getWriteDir();
1328
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1329
- adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
1330
- }
1331
- const imported = await mp(creds, adSpendDataToImport, {
1332
- recordType: "event",
1333
- ...commonOpts,
1334
- });
1335
- log(`\tsent ${comma(imported.success)} ad spend events\n`);
1336
- importResults.adSpend = imported;
1337
- }
1338
- if (groupProfilesData || isBATCH_MODE) {
1339
- for (const groupEntity of groupProfilesData) {
1340
- const groupKey = groupEntity?.groupKey;
1341
- log(`importing ${groupKey} profiles to mixpanel...\n`);
1342
- let groupProfilesToImport = clone(groupEntity);
1343
- if (isBATCH_MODE) {
1344
- const writeDir = groupEntity.getWriteDir();
1345
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1346
- groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
1347
- }
1348
- const imported = await mp({ token, groupKey }, groupProfilesToImport, {
1349
- recordType: "group",
1350
- ...commonOpts,
1351
-
1352
- });
1353
- log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
1354
-
1355
- importResults.groups.push(imported);
1356
- }
1357
- }
1358
-
1359
- if (groupEventData || isBATCH_MODE) {
1360
- log(`importing group events to mixpanel...\n`);
1361
- let groupEventDataToImport = clone(groupEventData);
1362
- if (isBATCH_MODE) {
1363
- const writeDir = groupEventData.getWriteDir();
1364
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1365
- groupEventDataToImport = files.filter(f => f.includes('-GROUP-EVENTS-'));
1366
- }
1367
- const imported = await mp(creds, groupEventDataToImport, {
1368
- recordType: "event",
1369
- ...commonOpts,
1370
- strict: false
1371
- });
1372
- log(`\tsent ${comma(imported.success)} group events\n`);
1373
- importResults.groupEvents = imported;
1374
- }
1375
- const { serviceAccount, projectId, serviceSecret } = config;
1376
- if (serviceAccount && projectId && serviceSecret) {
1377
- if (scdTableData || isBATCH_MODE) {
1378
- log(`importing SCD data to mixpanel...\n`);
1379
- for (const scdEntity of scdTableData) {
1380
- const scdKey = scdEntity?.scdKey;
1381
- log(`importing ${scdKey} SCD data to mixpanel...\n`);
1382
- let scdDataToImport = clone(scdEntity);
1383
- if (isBATCH_MODE) {
1384
- const writeDir = scdEntity.getWriteDir();
1385
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1386
- scdDataToImport = files.filter(f => f.includes(`-SCD-${scdKey}`));
1387
- }
1388
-
1389
- /** @type {import('mixpanel-import').Options} */
1390
- const options = {
1391
- recordType: "scd",
1392
- scdKey,
1393
- scdType: scdEntity.dataType,
1394
- scdLabel: `${scdKey}-scd`,
1395
- ...commonOpts,
1396
- };
1397
- if (scdEntity.entityType !== "user") options.groupKey = scdEntity.entityType;
1398
- const imported = await mp(
1399
- {
1400
- token,
1401
- acct: serviceAccount,
1402
- pass: serviceSecret,
1403
- project: projectId
1404
- },
1405
- scdDataToImport,
1406
- // @ts-ignore
1407
- options);
1408
- log(`\tsent ${comma(imported.success)} ${scdKey} SCD data\n`);
1409
- importResults[`${scdKey}_scd`] = imported;
1410
- }
1411
- }
1412
- }
1413
-
1414
- //if we are in batch mode, we need to delete the files
1415
- if (!writeToDisk && isBATCH_MODE) {
1416
- const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
1417
- const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
1418
- const files = listDir.filter(f => f.includes('-EVENTS-') || f.includes('-USERS-') || f.includes('-AD-SPEND-') || f.includes('-GROUPS-') || f.includes('-GROUP-EVENTS-'));
1419
- for (const file of files) {
1420
- await rm(file);
364
+ if (isCLI || config.verbose) {
365
+ console.log(`📊 Chart generated: ${chartPath}`);
366
+ } else {
367
+ sLog("Chart generated", { path: chartPath });
1421
368
  }
1422
369
  }
1423
- return importResults;
1424
370
  }
1425
371
 
1426
- /*
1427
- ----
1428
- META
1429
- ----
1430
- */
1431
-
1432
-
1433
372
  /**
1434
- * ensures that the config is valid and has all the necessary fields
1435
- * also adds some defaults
1436
- * @param {Config} config
373
+ * Flush all storage containers to disk
374
+ * @param {import('./types').Storage} storage - Storage containers
375
+ * @param {import('./types').Dungeon} config - Configuration object
1437
376
  */
1438
- function validateDungeonConfig(config) {
1439
- const chance = u.getChance();
1440
- let {
1441
- seed,
1442
- numEvents = 100_000,
1443
- numUsers = 1000,
1444
- numDays = 30,
1445
- epochStart = 0,
1446
- epochEnd = dayjs().unix(),
1447
- events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
1448
- superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
1449
- funnels = [],
1450
- userProps = {
1451
- spiritAnimal: chance.animal.bind(chance),
1452
- },
1453
- scdProps = {},
1454
- mirrorProps = {},
1455
- groupKeys = [],
1456
- groupProps = {},
1457
- lookupTables = [],
1458
- hasAnonIds = false,
1459
- hasSessionIds = false,
1460
- format = "csv",
1461
- token = null,
1462
- region = "US",
1463
- writeToDisk = false,
1464
- verbose = false,
1465
- makeChart = false,
1466
- soup = {},
1467
- hook = (record) => record,
1468
- hasAdSpend = false,
1469
- hasCampaigns = false,
1470
- hasLocation = false,
1471
- hasAvatar = false,
1472
- isAnonymous = false,
1473
- hasBrowser = false,
1474
- hasAndroidDevices = false,
1475
- hasDesktopDevices = false,
1476
- hasIOSDevices = false,
1477
- alsoInferFunnels = false,
1478
- name = "",
1479
- batchSize = 500_000,
1480
- concurrency = 500
1481
- } = config;
1482
-
1483
- //ensuring default for deep objects
1484
- if (!config.superProps) config.superProps = superProps;
1485
- if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
1486
-
1487
- //setting up "TIME"
1488
- if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
1489
- if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
1490
- if (epochStart && numDays) { } //noop
1491
- if (!epochStart && !numDays) debugger; //never happens
1492
-
1493
- config.simulationName = name || makeName();
1494
- config.name = config.simulationName;
1495
-
1496
- //events
1497
- if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
1498
- // @ts-ignore
1499
- if (typeof events[0] === "string") events = events.map(e => ({ event: e }));
1500
-
1501
- //max batch size
1502
- if (batchSize > 0) BATCH_SIZE = batchSize;
377
+ async function flushStorageToDisk(storage, config) {
378
+ if (isCLI || config.verbose) {
379
+ console.log('\n💾 Writing data to disk...');
380
+ }
1503
381
 
1504
- // funnels
382
+ const flushPromises = [];
1505
383
 
1506
- // FUNNEL INFERENCE
1507
- // if (!funnels || !funnels.length) {
1508
- // funnels = inferFunnels(events);
1509
- // }
384
+ // Flush single HookedArray containers
385
+ if (storage.eventData?.flush) flushPromises.push(storage.eventData.flush());
386
+ if (storage.userProfilesData?.flush) flushPromises.push(storage.userProfilesData.flush());
387
+ if (storage.adSpendData?.flush) flushPromises.push(storage.adSpendData.flush());
388
+ if (storage.mirrorEventData?.flush) flushPromises.push(storage.mirrorEventData.flush());
389
+ if (storage.groupEventData?.flush) flushPromises.push(storage.groupEventData.flush());
1510
390
 
1511
- if (alsoInferFunnels) {
1512
- const inferredFunnels = inferFunnels(events);
1513
- funnels = [...funnels, ...inferredFunnels];
1514
- }
391
+ // Flush arrays of HookedArrays
392
+ [storage.scdTableData, storage.groupProfilesData, storage.lookupTableData].forEach(arrayOfContainers => {
393
+ if (Array.isArray(arrayOfContainers)) {
394
+ arrayOfContainers.forEach(container => {
395
+ if (container?.flush) flushPromises.push(container.flush());
396
+ });
397
+ }
398
+ });
1515
399
 
400
+ await Promise.all(flushPromises);
1516
401
 
1517
- const eventContainedInFunnels = Array.from(funnels.reduce((acc, f) => {
1518
- const events = f.sequence;
1519
- events.forEach(event => acc.add(event));
1520
- return acc;
1521
- }, new Set()));
1522
-
1523
- const eventsNotInFunnels = events
1524
- .filter(e => !e.isFirstEvent)
1525
- .filter(e => !eventContainedInFunnels.includes(e.event)).map(e => e.event);
1526
- if (eventsNotInFunnels.length) {
1527
- // const biggestWeight = funnels.reduce((acc, f) => {
1528
- // if (f.weight > acc) return f.weight;
1529
- // return acc;
1530
- // }, 0);
1531
- // const smallestWeight = funnels.reduce((acc, f) => {
1532
- // if (f.weight < acc) return f.weight;
1533
- // return acc;
1534
- // }, 0);
1535
- // const weight = u.integer(smallestWeight, biggestWeight) * 2;
1536
-
1537
- const sequence = u.shuffleArray(eventsNotInFunnels.flatMap(event => {
1538
- const evWeight = config.events.find(e => e.event === event)?.weight || 1;
1539
- return Array(evWeight).fill(event);
1540
- }));
1541
-
1542
-
1543
-
1544
- funnels.push({
1545
- sequence,
1546
- conversionRate: 50,
1547
- order: 'random',
1548
- timeToConvert: 24 * 14,
1549
- requireRepeats: false,
1550
- });
402
+ if (isCLI || config.verbose) {
403
+ console.log('✅ Data flushed to disk successfully');
1551
404
  }
1552
-
1553
- config.concurrency = concurrency;
1554
- config.funnels = funnels;
1555
- config.batchSize = batchSize;
1556
- config.seed = seed;
1557
- config.numEvents = numEvents;
1558
- config.numUsers = numUsers;
1559
- config.numDays = numDays;
1560
- config.epochStart = epochStart;
1561
- config.epochEnd = epochEnd;
1562
- config.events = events;
1563
- config.superProps = superProps;
1564
- config.funnels = funnels;
1565
- config.userProps = userProps;
1566
- config.scdProps = scdProps;
1567
- config.mirrorProps = mirrorProps;
1568
- config.groupKeys = groupKeys;
1569
- config.groupProps = groupProps;
1570
- config.lookupTables = lookupTables;
1571
- config.hasAnonIds = hasAnonIds;
1572
- config.hasSessionIds = hasSessionIds;
1573
- config.format = format;
1574
- config.token = token;
1575
- config.region = region;
1576
- config.writeToDisk = writeToDisk;
1577
- config.verbose = verbose;
1578
- config.makeChart = makeChart;
1579
- config.soup = soup;
1580
- config.hook = hook;
1581
- config.hasAdSpend = hasAdSpend;
1582
- config.hasCampaigns = hasCampaigns;
1583
- config.hasLocation = hasLocation;
1584
- config.hasAvatar = hasAvatar;
1585
- config.isAnonymous = isAnonymous;
1586
- config.hasBrowser = hasBrowser;
1587
- config.hasAndroidDevices = hasAndroidDevices;
1588
- config.hasDesktopDevices = hasDesktopDevices;
1589
- config.hasIOSDevices = hasIOSDevices;
1590
-
1591
- //event validation
1592
- const validatedEvents = u.validateEventConfig(events);
1593
- events = validatedEvents;
1594
- config.events = validatedEvents;
1595
-
1596
- return config;
1597
405
  }
1598
406
 
1599
- /**
1600
- * our meta programming function which lets you mutate items as they are pushed into an array
1601
- * it also does batching and writing to disk
1602
- * it kind of is a class - as it produces new objects - but it's not a class
1603
- * @param {Object} arr
1604
- * @param {hookArrayOptions} opts
1605
- * @returns {Promise<hookArray>}
407
+ /**
408
+ * Extract file information from storage containers
409
+ * @param {import('./types').Storage} storage - Storage object
410
+ * @returns {string[]} Array of file paths
1606
411
  */
1607
- async function makeHookArray(arr = [], opts = {}) {
1608
- const { hook = a => a, type = "", filepath = "./defaultFile", format = "csv", concurrency = 1, ...rest } = opts;
1609
- const FILE_CONN = pLimit(concurrency); // concurrent file writes
1610
- let batch = 0;
1611
- let writeDir;
1612
- const dataFolder = path.resolve("./data");
1613
- if (existsSync(dataFolder)) writeDir = dataFolder;
1614
- else writeDir = path.resolve("./");
1615
-
1616
- // ! decide where to write the files in prod
1617
- if (NODE_ENV === "prod") {
1618
- writeDir = path.resolve(os.tmpdir());
1619
- }
1620
- if (typeof rest?.config?.writeToDisk === "string" && rest?.config?.writeToDisk?.startsWith('gs://')) {
1621
- writeDir = rest.config.writeToDisk;
1622
- }
1623
-
1624
- function getWritePath() {
1625
- if (isBATCH_MODE) {
1626
- if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
1627
- return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
1628
- }
1629
- else {
1630
- if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
1631
- return path.join(writeDir, `${filepath}.${format}`);
1632
- }
1633
- }
1634
-
1635
- function getWriteDir() {
1636
- return path.join(writeDir, `${filepath}.${format}`);
1637
- }
1638
-
1639
- async function transformThenPush(item, meta) {
1640
- if (item === null || item === undefined) return false;
1641
- if (typeof item === 'object' && Object.keys(item).length === 0) return false;
1642
- const allMetaData = { ...rest, ...meta };
1643
- if (Array.isArray(item)) {
1644
- for (const i of item) {
1645
- try {
1646
- const enriched = await hook(i, type, allMetaData);
1647
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1648
- else arr.push(enriched);
1649
- } catch (e) {
1650
- console.error(`\n\nyour hook had an error\n\n`, e);
1651
- arr.push(i);
412
+ function extractFileInfo(storage) {
413
+ const files = [];
414
+
415
+ Object.values(storage).forEach(container => {
416
+ if (Array.isArray(container)) {
417
+ container.forEach(subContainer => {
418
+ if (subContainer?.getWritePath) {
419
+ files.push(subContainer.getWritePath());
1652
420
  }
1653
- }
1654
- } else {
1655
- try {
1656
- const enriched = await hook(item, type, allMetaData);
1657
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1658
- else arr.push(enriched);
1659
- } catch (e) {
1660
- console.error(`\n\nyour hook had an error\n\n`, e);
1661
- arr.push(item);
1662
- }
1663
- }
1664
-
1665
- if (arr.length > BATCH_SIZE) {
1666
- isBATCH_MODE = true;
1667
- batch++;
1668
- const writePath = getWritePath();
1669
- const writeResult = await FILE_CONN(() => writeToDisk(arr, { writePath }));
1670
- return writeResult;
1671
- } else {
1672
- return Promise.resolve(false);
1673
- }
1674
- }
1675
-
1676
- async function writeToDisk(data, options) {
1677
- const { writePath } = options;
1678
- let writeResult;
1679
- if (VERBOSE) log(`\n\n\twriting ${writePath}\n\n`);
1680
- switch (format) {
1681
- case "csv":
1682
- writeResult = await u.streamCSV(writePath, data);
1683
- break;
1684
- case "json":
1685
- writeResult = await u.streamJSON(writePath, data);
1686
- break;
1687
- default:
1688
- throw new Error(`format ${format} is not supported`);
1689
- }
1690
- if (isBATCH_MODE) data.length = 0;
1691
- return writeResult;
1692
-
1693
- }
1694
-
1695
- async function flush() {
1696
- if (arr.length > 0) {
1697
- batch++;
1698
- const writePath = getWritePath();
1699
- await FILE_CONN(() => writeToDisk(arr, { writePath }));
1700
- if (isBATCH_MODE) arr.length = 0; // free up memory for batch mode
421
+ });
422
+ } else if (container?.getWritePath) {
423
+ files.push(container.getWritePath());
1701
424
  }
1702
- }
1703
-
1704
- const enrichedArray = arr;
1705
-
1706
- enrichedArray.hookPush = transformThenPush;
1707
- enrichedArray.flush = flush;
1708
- enrichedArray.getWriteDir = getWriteDir;
1709
- enrichedArray.getWritePath = getWritePath;
1710
-
1711
- for (const key in rest) {
1712
- enrichedArray[key.toString()] = rest[key];
1713
- }
425
+ });
1714
426
 
1715
- return enrichedArray;
427
+ return files;
1716
428
  }
1717
429
 
1718
-
1719
430
  /**
1720
- * create funnels out of random events
1721
- * @param {EventConfig[]} events
431
+ * Extract data from storage containers, preserving array structure for groups/lookups/SCDs
432
+ * @param {import('./types').Storage} storage - Storage object
433
+ * @returns {object} Extracted data in Result format
1722
434
  */
1723
- function inferFunnels(events) {
1724
- const createdFunnels = [];
1725
- const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
1726
- const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
1727
- const numFunnelsToCreate = Math.ceil(usageEvents.length);
1728
- /** @type {Funnel} */
1729
- const funnelTemplate = {
1730
- sequence: [],
1731
- conversionRate: 50,
1732
- order: 'sequential',
1733
- requireRepeats: false,
1734
- props: {},
1735
- timeToConvert: 1,
1736
- isFirstFunnel: false,
1737
- weight: 1
435
+ function extractStorageData(storage) {
436
+ return {
437
+ eventData: storage.eventData || [],
438
+ mirrorEventData: storage.mirrorEventData || [],
439
+ userProfilesData: storage.userProfilesData || [],
440
+ adSpendData: storage.adSpendData || [],
441
+ // Keep arrays of HookedArrays as separate arrays (don't flatten)
442
+ scdTableData: storage.scdTableData || [],
443
+ groupProfilesData: storage.groupProfilesData || [],
444
+ lookupTableData: storage.lookupTableData || []
1738
445
  };
1739
- if (firstEvents.length) {
1740
- for (const event of firstEvents) {
1741
- createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
1742
- }
1743
- }
1744
-
1745
- //at least one funnel with all usage events
1746
- createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
1747
-
1748
- //for the rest, make random funnels
1749
- followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
1750
- /** @type {Funnel} */
1751
- const funnel = { ...clone(funnelTemplate) };
1752
- funnel.conversionRate = u.integer(25, 75);
1753
- funnel.timeToConvert = u.integer(1, 10);
1754
- funnel.weight = u.integer(1, 10);
1755
- const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
1756
- funnel.sequence = sequence;
1757
- funnel.order = 'random';
1758
- createdFunnels.push(funnel);
1759
- }
1760
-
1761
- return createdFunnels;
1762
-
1763
446
  }
1764
447
 
1765
-
1766
- /*
1767
- ----
1768
- CLI
1769
- ----
1770
- */
1771
-
1772
- if (NODE_ENV !== "prod") {
1773
- if (require.main === module) {
1774
- isCLI = true;
1775
- const args = /** @type {Config} */ (getCliParams());
1776
- let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, hasSessionIds, hasAnonIds } = args;
1777
- const suppliedConfig = args._[0];
1778
-
1779
- //if the user specifies an separate config file
1780
- let config = null;
1781
- if (suppliedConfig) {
1782
- console.log(`using ${suppliedConfig} for data\n`);
1783
- config = require(path.resolve(suppliedConfig));
1784
- }
1785
- else {
1786
- if (complex) {
1787
- console.log(`... using default COMPLEX configuration [everything] ...\n`);
1788
- console.log(`... for more simple data, don't use the --complex flag ...\n`);
1789
- console.log(`... or specify your own js config file (see docs or --help) ...\n`);
1790
- config = require(path.resolve(__dirname, "./dungeons/complex.js"));
1791
- }
1792
- else {
1793
- console.log(`... using default SIMPLE configuration [events + users] ...\n`);
1794
- console.log(`... for more complex data, use the --complex flag ...\n`);
1795
- config = require(path.resolve(__dirname, "./dungeons/simple.js"));
1796
- }
1797
- }
1798
-
1799
- //override config with cli params
1800
- if (token) config.token = token;
1801
- if (seed) config.seed = seed;
1802
- if (format === "csv" && config.format === "json") format = "json";
1803
- if (format) config.format = format;
1804
- if (numDays) config.numDays = numDays;
1805
- if (numUsers) config.numUsers = numUsers;
1806
- if (numEvents) config.numEvents = numEvents;
1807
- if (region) config.region = region;
1808
- if (writeToDisk) config.writeToDisk = writeToDisk;
1809
- if (writeToDisk === 'false') config.writeToDisk = false;
1810
- if (hasSessionIds) config.hasSessionIds = hasSessionIds;
1811
- if (hasAnonIds) config.hasAnonIds = hasAnonIds;
1812
- config.verbose = true;
1813
-
1814
- main(config)
1815
- .then((data) => {
1816
- log(`-----------------SUMMARY-----------------`);
1817
- const d = { success: 0, bytes: 0 };
1818
- const darr = [d];
1819
- const { events = d, groups = darr, users = d } = data?.importResults || {};
1820
- const files = data.files;
1821
- const folder = files?.[0]?.split(path.basename(files?.[0]))?.shift() || "./";
1822
- const groupBytes = groups.reduce((acc, group) => {
1823
- return acc + group.bytes;
1824
- }, 0);
1825
- const groupSuccess = groups.reduce((acc, group) => {
1826
- return acc + group.success;
1827
- }, 0);
1828
- const bytes = events.bytes + groupBytes + users.bytes;
1829
- const stats = {
1830
- events: comma(events.success || 0),
1831
- users: comma(users.success || 0),
1832
- groups: comma(groupSuccess || 0),
1833
- bytes: bytesHuman(bytes || 0),
1834
- };
1835
- if (bytes > 0) console.table(stats);
1836
- if (Object.keys(data?.importResults || {}).length) {
1837
- log(`\nlog written to log.json\n`);
1838
- writeFileSync(path.resolve(folder, "log.json"), JSON.stringify(data?.importResults, null, 2));
448
+ // CLI execution
449
+ if (isCLI) {
450
+ (async () => {
451
+ const cliConfig = getCliParams();
452
+
453
+ // Load dungeon config if --complex or --simple flags are used
454
+ let finalConfig = cliConfig;
455
+ if (cliConfig.complex) {
456
+ const complexConfig = await import('./dungeons/complex.js');
457
+ finalConfig = { ...complexConfig.default, ...cliConfig };
458
+ } else if (cliConfig.simple) {
459
+ const simpleConfig = await import('./dungeons/simple.js');
460
+ finalConfig = { ...simpleConfig.default, ...cliConfig };
461
+ }
462
+
463
+ main(finalConfig)
464
+ .then(result => {
465
+ console.log(`📊 Generated ${(result.eventCount || 0).toLocaleString()} events for ${(result.userCount || 0).toLocaleString()} users`);
466
+ console.log(`⏱️ Total time: ${result.time?.human || 'unknown'}`);
467
+ if (result.files?.length) {
468
+ console.log(`📁 Files written: ${result.files.length}`);
469
+ if (cliConfig.verbose) {
470
+ result.files.forEach(file => console.log(` ${file}`));
471
+ }
1839
472
  }
1840
- // log(" " + files?.flat().join("\n "));
1841
- log(`\n----------------SUMMARY-----------------\n\n\n`);
1842
- })
1843
- .catch((e) => {
1844
- log(`------------------ERROR------------------`);
1845
- console.error(e);
1846
- log(`------------------ERROR------------------`);
1847
- debugger;
473
+ console.log(`\n✅ Job completed successfully!`);
474
+ process.exit(0);
1848
475
  })
1849
- .finally(() => {
1850
- log("enjoy your data! :)");
476
+ .catch(error => {
477
+ console.error(`\n❌ Job failed: ${error.message}`);
478
+ if (cliConfig.verbose) {
479
+ console.error(error.stack);
480
+ }
481
+ process.exit(1);
1851
482
  });
1852
- } else {
1853
- main.generators = { makeEvent, makeFunnel, makeProfile, makeSCD, makeAdSpend, makeMirror };
1854
- main.orchestrators = { userLoop, validateDungeonConfig, sendToMixpanel };
1855
- main.meta = { inferFunnels, hookArray: makeHookArray };
1856
- module.exports = main;
1857
- }
1858
- }
1859
-
1860
-
1861
-
1862
- /*
1863
- ----
1864
- HELPERS
1865
- ----
1866
- */
1867
-
1868
- function log(...args) {
1869
- const cwd = process.cwd(); // Get the current working directory
1870
-
1871
- for (let i = 0; i < args.length; i++) {
1872
- // Replace occurrences of the current working directory with "./" in string arguments
1873
- if (typeof args[i] === 'string') {
1874
- args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
1875
- }
1876
- }
1877
- if (VERBOSE) console.log(...args);
483
+ })();
1878
484
  }
1879
485
 
1880
- function track(name, props, ...rest) {
1881
- if (process.env.NODE_ENV === 'test') return;
1882
- metrics(name, props, ...rest);
1883
- }
486
+ // Cloud Functions setup
487
+ functions.http('entry', async (req, res) => {
488
+ await handleCloudFunctionEntry(req, res, main);
489
+ });
1884
490
 
491
+ // ES Module export
492
+ export default main;
1885
493
 
1886
- /** @typedef {import('./types.js').Dungeon} Config */
1887
- /** @typedef {import('./types.js').AllData} AllData */
1888
- /** @typedef {import('./types.js').EventConfig} EventConfig */
1889
- /** @typedef {import('./types.js').Funnel} Funnel */
1890
- /** @typedef {import('./types.js').Person} Person */
1891
- /** @typedef {import('./types.js').SCDSchema} SCDSchema */
1892
- /** @typedef {import('./types.js').UserProfile} UserProfile */
1893
- /** @typedef {import('./types.js').EventSchema} EventSchema */
1894
- /** @typedef {import('./types.js').Storage} Storage */
1895
- /** @typedef {import('./types.js').Result} Result */
1896
- /** @typedef {import('./types.js').ValueValid} ValueValid */
1897
- /** @typedef {import('./types.js').HookedArray} hookArray */
1898
- /** @typedef {import('./types.js').hookArrayOptions} hookArrayOptions */
1899
- /** @typedef {import('./types.js').GroupProfileSchema} GroupProfile */
1900
- /** @typedef {import('./types.js').SCDProp} SCDProp */
494
+ // CommonJS compatibility
495
+ if (typeof module !== 'undefined' && module.exports) {
496
+ module.exports = main;
497
+ }