make-mp-data 1.5.56 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/settings.local.json +20 -0
  2. package/.gcloudignore +2 -1
  3. package/.vscode/launch.json +6 -17
  4. package/.vscode/settings.json +31 -2
  5. package/dungeons/media.js +371 -0
  6. package/index.js +353 -1766
  7. package/{components → lib/cli}/cli.js +21 -6
  8. package/lib/cloud-function.js +20 -0
  9. package/lib/core/config-validator.js +248 -0
  10. package/lib/core/context.js +180 -0
  11. package/lib/core/storage.js +268 -0
  12. package/{components → lib/data}/defaults.js +17 -14
  13. package/lib/generators/adspend.js +133 -0
  14. package/lib/generators/events.js +242 -0
  15. package/lib/generators/funnels.js +330 -0
  16. package/lib/generators/mirror.js +168 -0
  17. package/lib/generators/profiles.js +93 -0
  18. package/lib/generators/scd.js +102 -0
  19. package/lib/orchestrators/mixpanel-sender.js +222 -0
  20. package/lib/orchestrators/user-loop.js +194 -0
  21. package/lib/orchestrators/worker-manager.js +200 -0
  22. package/{components → lib/utils}/ai.js +8 -36
  23. package/{components → lib/utils}/chart.js +9 -9
  24. package/{components → lib/utils}/project.js +4 -4
  25. package/{components → lib/utils}/utils.js +35 -23
  26. package/package.json +15 -15
  27. package/scripts/dana.mjs +137 -0
  28. package/scripts/new-dungeon.sh +7 -6
  29. package/scripts/update-deps.sh +2 -1
  30. package/tests/cli.test.js +28 -25
  31. package/tests/e2e.test.js +38 -36
  32. package/tests/int.test.js +151 -56
  33. package/tests/testSoup.mjs +1 -1
  34. package/tests/unit.test.js +15 -14
  35. package/tsconfig.json +1 -1
  36. package/types.d.ts +68 -11
  37. package/vitest.config.js +47 -0
  38. package/log.json +0 -1678
  39. package/tests/jest.config.js +0 -47
  40. /package/{components → lib/utils}/prompt.txt +0 -0
package/index.js CHANGED
@@ -1,1910 +1,497 @@
1
1
  #! /usr/bin/env node
2
2
 
3
- /*
4
- make fake mixpanel data easily!
5
- by AK
6
- ak@mixpanel.com
7
- */
8
-
9
- //todo: churn implementation
10
- //todo: regular interval events (like 'card charged')
11
- //todo: SCDs send to mixpanel
12
- //todo: decent 'new dungeon' workflow
13
- //todo: validation that funnel events exist
14
- //todo: ability to catch events not in funnels and make them random...
15
-
16
-
17
- //TIME
18
- const dayjs = require("dayjs");
19
- const utc = require("dayjs/plugin/utc");
3
+ /**
4
+ * make-mp-data: Generate realistic Mixpanel data for testing and demos
5
+ * Modular, scalable data generation with support for events, users, funnels, SCDs, and more
6
+ *
7
+ * @author AK <ak@mixpanel.com>
8
+ * @version 3.0.0
9
+ */
10
+
11
+ /** @typedef {import('./types').Dungeon} Config */
12
+ /** @typedef {import('./types').Storage} Storage */
13
+ /** @typedef {import('./types').Result} Result */
14
+ /** @typedef {import('./types').Context} Context */
15
+
16
+ // Core modules
17
+ import { createContext, updateContextWithStorage } from './lib/core/context.js';
18
+ import { validateDungeonConfig } from './lib/core/config-validator.js';
19
+ import { StorageManager } from './lib/core/storage.js';
20
+
21
+ // Orchestrators
22
+ import { userLoop } from './lib/orchestrators/user-loop.js';
23
+ import { sendToMixpanel } from './lib/orchestrators/mixpanel-sender.js';
24
+ import { handleCloudFunctionEntry } from './lib/orchestrators/worker-manager.js';
25
+
26
+ // Generators
27
+ import { makeAdSpend } from './lib/generators/adspend.js';
28
+ import { makeMirror } from './lib/generators/mirror.js';
29
+ import { makeGroupProfile, makeProfile } from './lib/generators/profiles.js';
30
+
31
+ // Utilities
32
+ import getCliParams from './lib/cli/cli.js';
33
+ import * as u from './lib/utils/utils.js';
34
+ import { generateLineChart } from './lib/utils/chart.js';
35
+
36
+ // External dependencies
37
+ import dayjs from "dayjs";
38
+ import utc from "dayjs/plugin/utc.js";
39
+ import functions from '@google-cloud/functions-framework';
40
+ import { timer, sLog } from 'ak-tools';
41
+ import fs, { existsSync } from 'fs';
42
+
43
+ // Initialize dayjs and time constants
20
44
  dayjs.extend(utc);
21
45
  const FIXED_NOW = dayjs('2024-02-02').unix();
22
46
  global.FIXED_NOW = FIXED_NOW;
23
- // ^ this creates a FIXED POINT in time; we will shift it later
24
47
  let FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(90, 'd').unix();
25
48
  global.FIXED_BEGIN = FIXED_BEGIN;
26
- const actualNow = dayjs().add(2, "day");
27
- const timeShift = actualNow.diff(dayjs.unix(FIXED_NOW), "seconds");
28
- const daysShift = actualNow.diff(dayjs.unix(FIXED_NOW), "days");
29
-
30
- // UTILS
31
- const { existsSync, writeFileSync } = require("fs");
32
- const pLimit = require('p-limit');
33
- const os = require("os");
34
- const path = require("path");
35
- const { comma, bytesHuman, makeName, md5, clone, tracker, uid, timer, ls, rm, touch, load, sLog } = require("ak-tools");
36
- const jobTimer = timer('job');
37
- const { generateLineChart } = require('./components/chart.js');
38
- const { version } = require('./package.json');
39
- const mp = require("mixpanel-import");
40
- const u = require("./components/utils.js");
41
- const getCliParams = require("./components/cli.js");
42
- const metrics = tracker("make-mp-data", "db99eb8f67ae50949a13c27cacf57d41", os.userInfo().username);
43
- const t = require('ak-tools');
44
-
45
-
46
- //CLOUD
47
- const functions = require('@google-cloud/functions-framework');
48
- const { GoogleAuth } = require('google-auth-library');
49
- const CONCURRENCY = 1_000;
50
- let RUNTIME_URL = "https://dm4-lmozz6xkha-uc.a.run.app"; // IMPORTANT: this is what allows the service to call itself
51
- // const functionName = process.env.FUNCTION_NAME || process.env.K_SERVICE;
52
-
53
- // const region = process.env.REGION; // Optionally, you can get the region too
54
- // const GCP_PROJECT = process.env.GCLOUD_PROJECT; // Project ID is also available as an environment variable
55
- // const isCloudFunction = !!process.env.FUNCTION_NAME || !!process.env.FUNCTION_TARGET;
56
- // if (isCloudFunction) {
57
- // RUNTIME_URL = `https://${region}-${GCP_PROJECT}.cloudfunctions.net/${functionName}`;
58
- // }
59
- // else {
60
- // RUNTIME_URL = `http://localhost:8080`;
61
- // }
62
-
63
-
64
-
65
- // DEFAULTS
66
- const { campaigns, devices, locations } = require('./components/defaults.js');
67
- let CAMPAIGNS;
68
- let DEFAULTS;
69
- /** @type {Storage} */
70
- let STORAGE;
71
- /** @type {Config} */
72
- let CONFIG;
73
- require('dotenv').config();
74
-
75
- const { NODE_ENV = "unknown" } = process.env;
76
-
77
-
78
-
79
-
80
- // RUN STATE
81
- let VERBOSE = false;
82
- let isCLI = false;
83
- // if we are running in batch mode, we MUST write to disk before we can send to mixpanel
84
- let isBATCH_MODE = false;
85
- let BATCH_SIZE = 1_000_000;
86
49
 
87
- //todo: these should be moved into the hookedArrays
88
- let operations = 0;
89
- let eventCount = 0;
90
- let userCount = 0;
50
+ // Package version
51
+ const { version } = JSON.parse(fs.readFileSync('./package.json', 'utf8'));
91
52
 
53
+ // Environment
54
+ const { NODE_ENV = "unknown" } = process.env;
55
+ const isCLI = process.argv[1].endsWith('index.js') || process.argv[1].endsWith('cli.js');
92
56
 
93
57
  /**
94
- * generates fake mixpanel data
95
- * @param {Config} config
58
+ * Main data generation function
59
+ * @param {Config} config - Configuration object for data generation
60
+ * @returns {Promise<Result>} Generated data and metadata
96
61
  */
97
62
  async function main(config) {
63
+ const jobTimer = timer('job');
98
64
  jobTimer.start();
99
- const seedWord = process.env.SEED || config.seed || "hello friend!";
100
- config.seed = seedWord;
101
- const chance = u.initChance(seedWord);
102
- //seed the random number generator, get it with getChance()
103
- // ^ this is critical; same seed = same data;
104
- // ^ seed can be passed in as an env var or in the config
105
- validateDungeonConfig(config);
106
- global.FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(config.numDays, 'd').unix();
107
-
108
- //GLOBALS
109
- CONFIG = config;
110
- VERBOSE = config.verbose;
111
- CAMPAIGNS = campaigns;
112
- DEFAULTS = {
113
- locationsUsers: u.pickAWinner(clone(locations).map(l => { delete l.country; return l; }), 0),
114
- locationsEvents: u.pickAWinner(clone(locations).map(l => { delete l.country_code; return l; }), 0),
115
- iOSDevices: u.pickAWinner(devices.iosDevices, 0),
116
- androidDevices: u.pickAWinner(devices.androidDevices, 0),
117
- desktopDevices: u.pickAWinner(devices.desktopDevices, 0),
118
- browsers: u.pickAWinner(devices.browsers, 0),
119
- campaigns: u.pickAWinner(campaigns, 0),
120
- };
121
-
122
- if (config.singleCountry) {
123
- DEFAULTS.locationsEvents = u.pickAWinner(clone(locations)
124
- .filter(l => l.country === config.singleCountry)
125
- .map(l => { delete l.country; return l; }), 0);
126
-
127
- DEFAULTS.locationsUsers = u.pickAWinner(clone(locations)
128
- .filter(l => l.country === config.singleCountry)
129
- .map(l => { delete l.country_code; return l; }), 0);
130
- }
131
-
132
-
133
- //TRACKING
134
- const runId = uid(42);
135
- const { events, superProps, userProps, scdProps, groupKeys, groupProps, lookupTables, soup, hook, mirrorProps, token: source_proj_token, ...trackingParams } = config;
136
- let { funnels } = config;
137
- trackingParams.runId = runId;
138
- trackingParams.version = version;
139
- delete trackingParams.funnels;
140
-
141
- //STORAGE
142
- const { simulationName, format } = config;
143
- const eventData = await makeHookArray([], { hook, type: "event", config, format, filepath: `${simulationName}-EVENTS` });
144
- const userProfilesData = await makeHookArray([], { hook, type: "user", config, format, filepath: `${simulationName}-USERS` });
145
- const adSpendData = await makeHookArray([], { hook, type: "ad-spend", config, format, filepath: `${simulationName}-AD-SPEND` });
146
- const groupEventData = await makeHookArray([], { hook, type: "group-event", config, format, filepath: `${simulationName}-GROUP-EVENTS` });
147
-
148
- // SCDs, Groups, + Lookups may have multiple tables
149
- const scdTableKeys = Object.keys(scdProps);
150
- const scdTableData = await Promise.all(scdTableKeys.map(async (key) =>
151
- //todo don't assume everything is a string... lol
152
- // @ts-ignore
153
- await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, entityType: config.scdProps[key]?.type || "user", dataType: "string", filepath: `${simulationName}-${scdProps[key]?.type || "user"}-SCD-${key}` })
154
- ));
155
- const groupTableKeys = Object.keys(groupKeys);
156
- const groupProfilesData = await Promise.all(groupTableKeys.map(async (key, index) => {
157
- const groupKey = groupKeys[index]?.slice()?.shift();
158
- return await makeHookArray([], { hook, type: "group", config, format, groupKey, filepath: `${simulationName}-GROUPS-${groupKey}` });
159
- }));
160
-
161
- const lookupTableKeys = Object.keys(lookupTables);
162
- const lookupTableData = await Promise.all(lookupTableKeys.map(async (key, index) => {
163
- const lookupKey = lookupTables[index].key;
164
- return await makeHookArray([], { hook, type: "lookup", config, format, lookupKey: lookupKey, filepath: `${simulationName}-LOOKUP-${lookupKey}` });
165
- }));
166
-
167
- const mirrorEventData = await makeHookArray([], { hook, type: "mirror", config, format, filepath: `${simulationName}-MIRROR` });
168
-
169
- STORAGE = {
170
- eventData,
171
- userProfilesData,
172
- scdTableData,
173
- groupProfilesData,
174
- lookupTableData,
175
- mirrorEventData,
176
- adSpendData,
177
- groupEventData
178
-
179
- };
180
-
181
65
 
182
- track('start simulation', trackingParams);
183
- log(`------------------SETUP------------------`);
184
- log(`\nyour data simulation will heretofore be known as: \n\n\t${config.simulationName.toUpperCase()}...\n`);
185
- log(`and your configuration is:\n\n`, JSON.stringify(trackingParams, null, 2));
186
- log(`------------------SETUP------------------`, "\n");
187
-
188
-
189
-
190
- //USERS
191
- log(`---------------SIMULATION----------------`, "\n\n");
192
- const { concurrency = 1 } = config;
193
- await userLoop(config, STORAGE, concurrency);
194
- const { hasAdSpend, epochStart, epochEnd } = config;
195
-
196
- // AD SPEND
197
- if (hasAdSpend) {
198
- const days = u.datesBetween(epochStart, epochEnd);
199
- for (const day of days) {
200
- const dailySpendData = await makeAdSpend(day);
201
- for (const spendEvent of dailySpendData) {
202
- await adSpendData.hookPush(spendEvent);
66
+ //cli mode check for positional dungeon config
67
+ if (isCLI) {
68
+ const firstArg = config._.slice().pop()
69
+ if (firstArg?.endsWith('.js') && existsSync(firstArg)) {
70
+ if (config.verbose) {
71
+ console.log(`\n🔍 Loading dungeon config from: ${firstArg}`);
72
+ }
73
+ try {
74
+ const dungeonConfig = await import(firstArg);
75
+ config = dungeonConfig.default || dungeonConfig;
76
+ } catch (error) {
77
+ console.error(`\n❌ Error loading dungeon config from ${firstArg}: ${error.message}`);
78
+ throw error;
203
79
  }
204
80
  }
205
-
81
+
206
82
  }
207
83
 
84
+ let validatedConfig;
85
+ try {
86
+ // Step 1: Validate and enrich configuration
87
+ validatedConfig = validateDungeonConfig(config);
208
88
 
209
- log("\n");
210
-
211
- //GROUP PROFILES
212
- const groupSCDs = t.objFilter(scdProps, (scd) => scd.type !== 'user');
213
- for (const [index, groupPair] of groupKeys.entries()) {
214
- const groupKey = groupPair[0];
215
- const groupCardinality = groupPair[1];
216
- for (let i = 1; i < groupCardinality + 1; i++) {
217
- if (VERBOSE) u.progress([["groups", i]]);
218
-
219
- const props = await makeProfile(groupProps[groupKey], { created: () => { return dayjs().subtract(u.integer(0, CONFIG.numDays || 30), 'd').toISOString(); } });
220
- const group = {
221
- [groupKey]: i,
222
- ...props,
223
- };
224
- group["distinct_id"] = i.toString();
225
- await groupProfilesData[index].hookPush(group);
226
-
227
- //SCDs
228
- const thisGroupSCD = t.objFilter(groupSCDs, (scd) => scd.type === groupKey);
229
- const groupSCDKeys = Object.keys(thisGroupSCD);
230
- const groupSCD = {};
231
- for (const [index, key] of groupSCDKeys.entries()) {
232
- const { max = 100 } = groupSCDs[key];
233
- const mutations = chance.integer({ min: 2, max });
234
- const changes = await makeSCD(scdProps[key], key, i.toString(), mutations, group.created);
235
- groupSCD[key] = changes;
236
- const scdTable = scdTableData
237
- .filter(hookArr => hookArr.scdKey === key);
238
-
239
- await config.hook(changes, 'scd-pre', { profile: group, type: groupKey, scd: { [key]: groupSCDs[key] }, config, allSCDs: groupSCD });
240
- await scdTable[0].hookPush(changes, { profile: group, type: groupKey });
241
- }
89
+ // Step 2: Create context with validated config
90
+ const context = createContext(validatedConfig);
242
91
 
92
+ // Step 3: Initialize storage containers
93
+ const storageManager = new StorageManager(context);
94
+ const storage = await storageManager.initializeContainers();
95
+ updateContextWithStorage(context, storage);
243
96
 
97
+ // Step 4: Generate ad spend data (if enabled)
98
+ if (validatedConfig.hasAdSpend) {
99
+ await generateAdSpendData(context);
244
100
  }
245
- }
246
- log("\n");
247
-
248
- //GROUP EVENTS
249
- if (config.groupEvents) {
250
- for (const groupEvent of config.groupEvents) {
251
- const { frequency, group_key, attribute_to_user, group_size, ...normalEvent } = groupEvent;
252
- for (const group_num of Array.from({ length: group_size }, (_, i) => i + 1)) {
253
- const groupProfile = groupProfilesData.find(groups => groups.groupKey === group_key).find(group => group[group_key] === group_num);
254
- const { created, distinct_id } = groupProfile;
255
- normalEvent[group_key] = distinct_id;
256
- const random_user_id = chance.pick(eventData.filter(a => a.user_id)).user_id;
257
- if (!random_user_id) debugger;
258
- const deltaDays = actualNow.diff(dayjs(created), "day");
259
- const numIntervals = Math.floor(deltaDays / frequency);
260
- const eventsForThisGroup = [];
261
- for (let i = 0; i < numIntervals; i++) {
262
- const event = await makeEvent(random_user_id, null, normalEvent, [], [], {}, [], false, true);
263
- if (!attribute_to_user) delete event.user_id;
264
- event[group_key] = distinct_id;
265
- event.time = dayjs(created).add(i * frequency, "day").toISOString();
266
- delete event.distinct_id;
267
- //always skip the first event
268
- if (i !== 0) {
269
- eventsForThisGroup.push(event);
270
- }
271
- }
272
- await groupEventData.hookPush(eventsForThisGroup, { profile: groupProfile });
273
- }
274
- }
275
- }
276
101
 
102
+ // Step 5: Main user and event generation
103
+ await userLoop(context);
277
104
 
278
- //LOOKUP TABLES
279
- for (const [index, lookupTable] of lookupTables.entries()) {
280
- const { key, entries, attributes } = lookupTable;
281
- for (let i = 1; i < entries + 1; i++) {
282
- if (VERBOSE) u.progress([["lookups", i]]);
283
- const props = await makeProfile(attributes);
284
- const item = {
285
- [key]: i,
286
- ...props,
287
- };
288
- await lookupTableData[index].hookPush(item);
105
+ // Step 6: Generate group profiles (if configured)
106
+ if (validatedConfig.groupKeys && validatedConfig.groupKeys.length > 0) {
107
+ await generateGroupProfiles(context);
289
108
  }
290
109
 
291
- }
292
- log("\n");
293
-
294
-
295
- // MIRROR
296
- if (Object.keys(mirrorProps).length) await makeMirror(config, STORAGE);
297
-
298
-
299
- log("\n");
300
- log(`---------------SIMULATION----------------`, "\n");
301
-
302
- // draw charts
303
- const { makeChart = false } = config;
304
- if (makeChart) {
305
- const bornEvents = config.events?.filter((e) => e?.isFirstEvent)?.map(e => e.event) || [];
306
- const bornFunnels = config.funnels?.filter((f) => f.isFirstFunnel)?.map(f => f.sequence[0]) || [];
307
- const bornBehaviors = [...bornEvents, ...bornFunnels];
308
- const chart = await generateLineChart(eventData, bornBehaviors, makeChart);
309
- }
310
- const { writeToDisk = true, token } = config;
311
- if (!writeToDisk && !token) {
312
- jobTimer.stop(false);
313
- const { start, end, delta, human } = jobTimer.report(false);
314
- // this is awkward, but i couldn't figure out any other way to assert a type in jsdoc
315
- const i = /** @type {any} */ (STORAGE);
316
- i.time = { start, end, delta, human };
317
- const j = /** @type {Result} */ (i);
318
- return j;
319
-
320
- }
321
-
322
- log(`-----------------WRITES------------------`, `\n\n`);
323
-
324
- // write to disk and/or send to mixpanel
325
- let files;
326
- if (writeToDisk) {
327
- for (const key in STORAGE) {
328
- const table = STORAGE[key];
329
- if (table.length && typeof table.flush === "function") {
330
- await table.flush();
331
- } else {
332
- if (Array.isArray(table) && typeof table[0]?.flush === "function") {
333
- for (const subTable of table) {
334
- await subTable.flush();
335
- }
336
- }
337
- }
110
+ // Step 7: Generate group SCDs (if configured)
111
+ if (validatedConfig.scdProps && validatedConfig.groupKeys && validatedConfig.groupKeys.length > 0) {
112
+ await generateGroupSCDs(context);
338
113
  }
339
- }
340
- let importResults;
341
- if (token) importResults = await sendToMixpanel(config, STORAGE);
342
-
343
-
344
- log(`\n-----------------WRITES------------------`, "\n");
345
- track('end simulation', trackingParams);
346
- jobTimer.stop(false);
347
- const { start, end, delta, human } = jobTimer.report(false);
348
-
349
- // if (process.env.NODE_ENV === 'dev')debugger;
350
- return {
351
- ...STORAGE,
352
- importResults,
353
- files,
354
- operations,
355
- eventCount,
356
- userCount,
357
- time: { start, end, delta, human },
358
- };
359
- }
360
114
 
361
- functions.http('entry', async (req, res) => {
362
- const reqTimer = timer('request');
363
- reqTimer.start();
364
- let response = {};
365
- let script = req.body || "";
366
- const params = { replicate: 1, is_replica: "false", runId: "", seed: "", ...req.query };
367
- const replicate = Number(params.replicate);
368
- // @ts-ignore
369
- if (params?.is_replica === "true") params.is_replica = true;
370
- // @ts-ignore
371
- else params.is_replica = false;
372
- const isReplica = params.is_replica;
373
- isBATCH_MODE = true;
374
- if (!params.runId) params.runId = uid(42);
375
- try {
376
- if (!script) throw new Error("no script");
377
-
378
- // Replace require("../ with require("./
379
- // script = script.replace(/require\("\.\.\//g, 'require("./');
380
- // ^ need to replace this because of the way the script is passed in... this is sketch
381
-
382
- /** @type {Config} */
383
- const config = eval(script);
384
- if (isReplica) {
385
- const newSeed = (Math.random() / Math.random() / Math.random() / Math.random() / Math.random() / Math.random()).toString();
386
- config.seed = newSeed;
387
- params.seed = newSeed;
115
+ // Step 8: Generate lookup tables (if configured)
116
+ if (validatedConfig.lookupTables && validatedConfig.lookupTables.length > 0) {
117
+ await generateLookupTables(context);
388
118
  }
389
119
 
390
- /** @type {Config} */
391
- const optionsYouCantChange = {
392
- verbose: false
393
- };
394
-
395
- if (replicate <= 1 || isReplica) {
396
- if (isReplica) sLog("DM4: worker start", params);
397
- // @ts-ignore
398
- const { files = [], operations = 0, eventCount = 0, userCount = 0 } = await main({
399
- ...config,
400
- ...optionsYouCantChange,
401
- });
402
- reqTimer.stop(false);
403
- response = { files, operations, eventCount, userCount };
120
+ // Step 9: Generate mirror datasets (if configured)
121
+ if (validatedConfig.mirrorProps && Object.keys(validatedConfig.mirrorProps).length > 0) {
122
+ await makeMirror(context);
404
123
  }
405
124
 
406
- else {
407
- sLog(`DM4: job start (${replicate} workers)`, params);
408
- const results = await spawn_file_workers(replicate, script, params);
409
- response = results;
125
+ // Step 10: Generate charts (if enabled)
126
+ if (validatedConfig.makeChart) {
127
+ await generateCharts(context);
410
128
  }
411
- }
412
- catch (e) {
413
- sLog("DM4: error", { error: e.message, stack: e.stack }, "ERROR");
414
- response = { error: e.message };
415
- res.status(500);
416
- }
417
129
 
418
- finally {
419
- reqTimer.stop(false);
420
- const { start, end, delta, human } = reqTimer.report(false);
421
- if (!isReplica) {
422
- sLog(`DM4: job end (${human})`, { human, delta, ...params, ...response });
423
- }
424
- if (isReplica) {
425
- const eps = Math.floor(((response?.eventCount || 0) / delta) * 1000);
426
- sLog(`DM4: worker end (${human})`, { human, delta, eps, ...params, ...response });
130
+ // Step 11: Flush storage containers to disk (if writeToDisk enabled)
131
+ if (validatedConfig.writeToDisk) {
132
+ await flushStorageToDisk(storage, validatedConfig);
427
133
  }
428
- response = { ...response, start, end, delta, human, ...params };
429
- res.send(response);
430
- return;
431
- }
432
- });
433
134
 
135
+ // Step 12: Send to Mixpanel (if token provided)
136
+ let importResults;
137
+ if (validatedConfig.token) {
138
+ importResults = await sendToMixpanel(context);
139
+ }
434
140
 
435
- /**
436
- * @typedef {import('mixpanel-import').ImportResults} ImportResults
437
- */
438
- async function spawn_file_workers(numberWorkers, payload, params) {
439
- const auth = new GoogleAuth();
440
- let client;
441
- if (RUNTIME_URL.includes('localhost')) {
442
- client = await auth.getClient();
443
- }
444
- else {
445
- client = await auth.getIdTokenClient(RUNTIME_URL);
446
- }
447
- const limit = pLimit(CONCURRENCY);
448
- const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
449
-
450
- const requestPromises = Array.from({ length: numberWorkers }, async (_, index) => {
451
- index = index + 1;
452
- await delay(index * 108);
453
- return limit(() => build_request(client, payload, index, params, numberWorkers));
454
- });
455
- const complete = await Promise.allSettled(requestPromises);
456
- const results = {
457
- jobs_success: complete.filter((p) => p.status === "fulfilled").length,
458
- jobs_fail: complete.filter((p) => p.status === "rejected").length
459
- };
141
+ // Step 13: Compile results
142
+ jobTimer.stop(false);
143
+ const { start, end, delta, human } = jobTimer.report(false);
460
144
 
461
- return results;
462
- }
145
+ const extractedData = extractStorageData(storage);
463
146
 
147
+ return {
148
+ ...extractedData,
149
+ importResults,
150
+ files: extractFileInfo(storage),
151
+ time: { start, end, delta, human },
152
+ operations: context.getOperations(),
153
+ eventCount: context.getEventCount(),
154
+ userCount: context.getUserCount()
155
+ };
464
156
 
465
- async function build_request(client, payload, index, params, total) {
466
- let retryAttempt = 0;
467
- sLog(`DM4: summoning worker #${index} of ${total}`, params);
468
- try {
469
- const req = await client.request({
470
- url: RUNTIME_URL + `?replicate=1&is_replica=true&runId=${params.runId || "no run id"}`,
471
- method: "POST",
472
- data: payload,
473
- headers: {
474
- "Content-Type": "text/plain",
475
- },
476
- timeout: 3600 * 1000 * 10,
477
- retryConfig: {
478
- retry: 3,
479
- onRetryAttempt: (error) => {
480
- const statusCode = error?.response?.status?.toString() || "";
481
- retryAttempt++;
482
- sLog(`DM4: summon worker ${index} retry #${retryAttempt}`, { statusCode, message: error.message, stack: error.stack, ...params }, "DEBUG");
483
- },
484
- retryDelay: 1000,
485
- shouldRetry: (error) => {
486
- if (error.code === 'ECONNRESET') return true;
487
- const statusCode = error?.response?.status;
488
- if (statusCode >= 500) return true;
489
- if (statusCode === 429) return true;
490
- }
491
- },
492
- });
493
- sLog(`DM4: worker #${index} responded`, params);
494
- const { data } = req;
495
- return data;
496
157
  } catch (error) {
497
- sLog(`DM4: worker #${index} failed to respond`, { message: error.message, stack: error.stack, code: error.code, retries: retryAttempt, ...params }, "ERROR");
498
- return {};
158
+ if (isCLI || validatedConfig.verbose) {
159
+ console.error(`\n❌ Error: ${error.message}\n`);
160
+ if (validatedConfig.verbose) {
161
+ console.error(error.stack);
162
+ }
163
+ } else {
164
+ sLog("Main execution error", { error: error.message, stack: error.stack }, "ERROR");
165
+ }
166
+ throw error;
499
167
  }
500
168
  }
501
169
 
502
-
503
-
504
- /*
505
- ------
506
- MODELS
507
- ------
508
- */
509
-
510
170
  /**
511
- * creates a mixpanel event with a flat shape
512
- * @param {string} distinct_id
513
- * @param {number} earliestTime
514
- * @param {EventConfig} chosenEvent
515
- * @param {string[]} [anonymousIds]
516
- * @param {string[]} [sessionIds]
517
- * @param {Object} [superProps]
518
- * @param {Object} [groupKeys]
519
- * @param {Boolean} [isFirstEvent]
520
- * @return {Promise<EventSchema>}
171
+ * Generate ad spend data for configured date range
172
+ * @param {Context} context - Context object
521
173
  */
522
- async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, sessionIds, superProps, groupKeys, isFirstEvent, skipDefaults = false) {
523
- operations++;
524
- eventCount++;
525
- if (!distinct_id) throw new Error("no distinct_id");
526
- if (!anonymousIds) anonymousIds = [];
527
- if (!sessionIds) sessionIds = [];
528
- if (!earliestTime) throw new Error("no earliestTime");
529
- if (!chosenEvent) throw new Error("no chosenEvent");
530
- if (!superProps) superProps = {};
531
- if (!groupKeys) groupKeys = [];
532
- if (!isFirstEvent) isFirstEvent = false;
533
- const chance = u.getChance();
534
- const { mean = 0, deviation = 2, peaks = 5 } = CONFIG?.soup || {};
535
- const {
536
- hasAndroidDevices = false,
537
- hasBrowser = false,
538
- hasCampaigns = false,
539
- hasDesktopDevices = false,
540
- hasIOSDevices = false,
541
- hasLocation = false
542
- } = CONFIG || {};
543
-
544
- //event model
545
- const eventTemplate = {
546
- event: chosenEvent.event,
547
- source: "dm4",
548
- time: "",
549
- insert_id: "",
550
- };
551
-
552
- let defaultProps = {};
553
- let devicePool = [];
554
-
555
- if (hasLocation) defaultProps.location = u.shuffleArray(DEFAULTS.locationsEvents()).pop();
556
- if (hasBrowser) defaultProps.browser = u.choose(DEFAULTS.browsers());
557
- if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
558
- if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
559
- if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
560
-
561
- // we don't always have campaigns, because of attribution
562
- if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = u.shuffleArray(DEFAULTS.campaigns()).pop();
563
- const devices = devicePool.flat();
564
- if (devices.length) defaultProps.device = u.shuffleArray(devices).pop();
565
-
566
-
567
-
568
-
174
+ async function generateAdSpendData(context) {
175
+ const { config, storage } = context;
176
+ const { numDays } = config;
569
177
 
178
+ for (let day = 0; day < numDays; day++) {
179
+ const targetDay = dayjs.unix(global.FIXED_BEGIN).add(day, 'day').toISOString();
180
+ const adSpendEvents = await makeAdSpend(context, targetDay);
570
181
 
571
- //event time
572
- if (earliestTime) {
573
- if (isFirstEvent) eventTemplate.time = dayjs.unix(earliestTime).toISOString();
574
- if (!isFirstEvent) eventTemplate.time = u.TimeSoup(earliestTime, FIXED_NOW, peaks, deviation, mean);
575
- }
576
-
577
- // anonymous and session ids
578
- if (anonymousIds.length) eventTemplate.device_id = chance.pickone(anonymousIds);
579
- if (sessionIds.length) eventTemplate.session_id = chance.pickone(sessionIds);
580
-
581
- //sometimes have a user_id
582
- if (!isFirstEvent && chance.bool({ likelihood: 42 })) eventTemplate.user_id = distinct_id;
583
-
584
- // ensure that there is a user_id or device_id
585
- if (!eventTemplate.user_id && !eventTemplate.device_id) eventTemplate.user_id = distinct_id;
586
-
587
- const props = { ...chosenEvent.properties, ...superProps };
588
-
589
- //iterate through custom properties
590
- for (const key in props) {
591
- try {
592
- eventTemplate[key] = u.choose(props[key]);
593
- } catch (e) {
594
- console.error(`error with ${key} in ${chosenEvent.event} event`, e);
595
- debugger;
596
- }
597
- }
598
-
599
- //iterate through default properties
600
- if (!skipDefaults) {
601
- for (const key in defaultProps) {
602
- if (Array.isArray(defaultProps[key])) {
603
- const choice = u.choose(defaultProps[key]);
604
- if (typeof choice === "string") {
605
- if (!eventTemplate[key]) eventTemplate[key] = choice;
606
- }
607
-
608
- else if (Array.isArray(choice)) {
609
- for (const subChoice of choice) {
610
- if (!eventTemplate[key]) eventTemplate[key] = subChoice;
611
- }
612
- }
613
-
614
- else if (typeof choice === "object") {
615
- for (const subKey in choice) {
616
- if (typeof choice[subKey] === "string") {
617
- if (!eventTemplate[subKey]) eventTemplate[subKey] = choice[subKey];
618
- }
619
- else if (Array.isArray(choice[subKey])) {
620
- const subChoice = u.choose(choice[subKey]);
621
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
622
- }
623
-
624
- else if (typeof choice[subKey] === "object") {
625
- for (const subSubKey in choice[subKey]) {
626
- if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = choice[subKey][subSubKey];
627
- }
628
- }
629
-
630
- }
631
- }
632
- }
633
- else if (typeof (defaultProps[key]) === "object") {
634
- const obj = defaultProps[key];
635
- for (const subKey in obj) {
636
- if (Array.isArray(obj[subKey])) {
637
- const subChoice = u.choose(obj[subKey]);
638
- if (Array.isArray(subChoice)) {
639
- for (const subSubChoice of subChoice) {
640
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subSubChoice;
641
- }
642
- }
643
- else if (typeof subChoice === "object") {
644
- for (const subSubKey in subChoice) {
645
- if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = subChoice[subSubKey];
646
- }
647
- }
648
- else {
649
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
650
- }
651
- }
652
- else {
653
- if (!eventTemplate[subKey]) eventTemplate[subKey] = obj[subKey];
654
- }
655
- }
656
- }
657
- else {
658
- if (!eventTemplate[key]) eventTemplate[key] = defaultProps[key];
182
+ if (adSpendEvents.length > 0) {
183
+ for (const adSpendEvent of adSpendEvents) {
184
+ await storage.adSpendData.hookPush(adSpendEvent);
659
185
  }
660
186
  }
661
187
  }
662
-
663
- //iterate through groups
664
- for (const groupPair of groupKeys) {
665
- const groupKey = groupPair[0];
666
- const groupCardinality = groupPair[1];
667
- const groupEvents = groupPair[2] || [];
668
-
669
- // empty array for group events means all events
670
- if (!groupEvents.length) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
671
- if (groupEvents.includes(eventTemplate.event)) eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
672
- }
673
-
674
- //make $insert_id
675
- eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
676
-
677
- // move time forward
678
- if (earliestTime) {
679
- const timeShifted = dayjs(eventTemplate.time).add(timeShift, "seconds").toISOString();
680
- eventTemplate.time = timeShifted;
681
- }
682
-
683
-
684
- return eventTemplate;
685
188
  }
686
189
 
687
190
  /**
688
- * takes a description of a funnel an generates successful and unsuccessful conversions
689
- * this is called MANY times per user
690
- * @param {Funnel} funnel
691
- * @param {Person} user
692
- * @param {number} firstEventTime
693
- * @param {UserProfile | Object} [profile]
694
- * @param {Record<string, SCDSchema[]> | Object} [scd]
695
- * @param {Config} [config]
696
- * @return {Promise<[EventSchema[], Boolean]>}
191
+ * Generate group profiles for all configured group keys
192
+ * @param {Context} context - Context object
697
193
  */
698
- async function makeFunnel(funnel, user, firstEventTime, profile, scd, config) {
699
- if (!funnel) throw new Error("no funnel");
700
- if (!user) throw new Error("no user");
701
- if (!profile) profile = {};
702
- if (!scd) scd = {};
703
- const sessionStartEvents = config?.events?.filter(a => a.isSessionStartEvent) || [];
704
-
194
+ async function generateGroupProfiles(context) {
195
+ const { config, storage } = context;
196
+ const { groupKeys, groupProps = {} } = config;
705
197
 
706
- const chance = u.getChance();
707
- const { hook = async (a) => a } = config;
708
- await hook(funnel, "funnel-pre", { user, profile, scd, funnel, config });
709
- let {
710
- sequence,
711
- conversionRate = 50,
712
- order = 'sequential',
713
- timeToConvert = 1,
714
- props,
715
- requireRepeats = false,
716
- } = funnel;
717
- const { distinct_id, created, anonymousIds, sessionIds } = user;
718
- const { superProps, groupKeys } = config;
719
-
720
-
721
- //choose the properties for this funnel
722
- const chosenFunnelProps = { ...props, ...superProps };
723
- for (const key in props) {
724
- try {
725
- chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
726
- } catch (e) {
727
- console.error(`error with ${key} in ${funnel.sequence.join(" > ")} funnel`, e);
728
- debugger;
729
- }
198
+ if (isCLI || config.verbose) {
199
+ console.log('\n👥 Generating group profiles...');
730
200
  }
731
201
 
732
- const funnelPossibleEvents = sequence
733
- .map((eventName) => {
734
- const foundEvent = config?.events?.find((e) => e.event === eventName);
735
- /** @type {EventConfig} */
736
- const eventSpec = clone(foundEvent) || { event: eventName, properties: {} };
737
- for (const key in eventSpec.properties) {
738
- try {
739
- eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
740
- } catch (e) {
741
- console.error(`error with ${key} in ${eventSpec.event} event`, e);
742
- debugger;
743
- }
744
- }
745
- delete eventSpec.isFirstEvent;
746
- delete eventSpec.weight;
747
- eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
748
- return eventSpec;
749
- })
750
- .reduce((acc, step) => {
751
- if (!requireRepeats) {
752
- if (acc.find(e => e.event === step.event)) {
753
- if (chance.bool({ likelihood: 50 })) {
754
- conversionRate = Math.floor(conversionRate * 1.35); //increase conversion rate
755
- acc.push(step);
756
- }
757
- //A SKIPPED STEP!
758
- else {
759
- conversionRate = Math.floor(conversionRate * .70); //reduce conversion rate
760
- return acc; //early return to skip the step
761
- }
762
- }
763
- else {
764
- acc.push(step);
765
- }
766
- }
767
- else {
768
- acc.push(step);
769
- }
770
- return acc;
771
- }, []);
772
-
773
- if (conversionRate > 100) conversionRate = 100;
774
- if (conversionRate < 0) conversionRate = 0;
775
- let doesUserConvert = chance.bool({ likelihood: conversionRate });
776
- let numStepsUserWillTake = sequence.length;
777
- if (!doesUserConvert) numStepsUserWillTake = u.integer(1, sequence.length - 1);
778
- const funnelTotalRelativeTimeInHours = timeToConvert / numStepsUserWillTake;
779
- const msInHour = 60000 * 60;
780
- const funnelStepsUserWillTake = funnelPossibleEvents.slice(0, numStepsUserWillTake);
781
-
782
- let funnelActualOrder = [];
783
-
784
- switch (order) {
785
- case "sequential":
786
- funnelActualOrder = funnelStepsUserWillTake;
787
- break;
788
- case "random":
789
- funnelActualOrder = u.shuffleArray(funnelStepsUserWillTake);
790
- break;
791
- case "first-fixed":
792
- funnelActualOrder = u.shuffleExceptFirst(funnelStepsUserWillTake);
793
- break;
794
- case "last-fixed":
795
- funnelActualOrder = u.shuffleExceptLast(funnelStepsUserWillTake);
796
- break;
797
- case "first-and-last-fixed":
798
- funnelActualOrder = u.fixFirstAndLast(funnelStepsUserWillTake);
799
- break;
800
- case "middle-fixed":
801
- funnelActualOrder = u.shuffleOutside(funnelStepsUserWillTake);
802
- break;
803
- case "interrupted":
804
- const potentialSubstitutes = config?.events
805
- ?.filter(e => !e.isFirstEvent)
806
- ?.filter(e => !sequence.includes(e.event)) || [];
807
- funnelActualOrder = u.interruptArray(funnelStepsUserWillTake, potentialSubstitutes);
808
- break;
809
- default:
810
- funnelActualOrder = funnelStepsUserWillTake;
811
- break;
812
- }
813
-
814
-
815
-
816
- let lastTimeJump = 0;
817
- const funnelActualEventsWithOffset = funnelActualOrder
818
- .map((event, index) => {
819
- if (index === 0) {
820
- event.relativeTimeMs = 0;
821
- return event;
822
- }
823
-
824
- // Calculate base increment for each step
825
- const baseIncrement = (timeToConvert * msInHour) / numStepsUserWillTake;
826
-
827
- // Introduce a random fluctuation factor
828
- const fluctuation = u.integer(-baseIncrement / u.integer(3, 5), baseIncrement / u.integer(3, 5));
829
-
830
- // Ensure the time increments are increasing and add randomness
831
- const previousTime = lastTimeJump;
832
- const currentTime = previousTime + baseIncrement + fluctuation;
202
+ for (let i = 0; i < groupKeys.length; i++) {
203
+ const [groupKey, groupCount] = groupKeys[i];
204
+ const groupContainer = storage.groupProfilesData[i];
833
205
 
834
- // Assign the calculated time to the event
835
- const chosenTime = Math.max(currentTime, previousTime + 1); // Ensure non-decreasing time
836
- lastTimeJump = chosenTime;
837
- event.relativeTimeMs = chosenTime;
838
- return event;
839
- });
206
+ if (!groupContainer) {
207
+ console.warn(`Warning: No storage container found for group key: ${groupKey}`);
208
+ continue;
209
+ }
840
210
 
211
+ if (isCLI || config.verbose) {
212
+ console.log(` Creating ${groupCount.toLocaleString()} ${groupKey} profiles...`);
213
+ }
841
214
 
842
- const earliestTime = firstEventTime || dayjs(created).unix();
843
- let funnelStartTime;
215
+ // Get group-specific props if available
216
+ const specificGroupProps = groupProps[groupKey] || {};
844
217
 
218
+ for (let j = 0; j < groupCount; j++) {
219
+ const groupProfile = await makeGroupProfile(context, groupKey, specificGroupProps, {
220
+ [groupKey]: `${groupKey}_${j + 1}`
221
+ });
845
222
 
846
- if (sessionStartEvents.length) {
847
- const sessionStartEvent = chance.pickone(sessionStartEvents);
848
- sessionStartEvent.relativeTimeMs = -15000;
849
- funnelActualEventsWithOffset.push(sessionStartEvent);
223
+ await groupContainer.hookPush(groupProfile);
224
+ }
850
225
  }
851
226
 
852
-
853
- let finalEvents = await Promise.all(funnelActualEventsWithOffset
854
- .map(async (event, index) => {
855
- const newEvent = await makeEvent(distinct_id, earliestTime, event, anonymousIds, sessionIds, {}, groupKeys);
856
- if (index === 0) {
857
- funnelStartTime = dayjs(newEvent.time);
858
- delete newEvent.relativeTimeMs;
859
- return Promise.resolve(newEvent);
860
- }
861
- try {
862
- newEvent.time = dayjs(funnelStartTime).add(event.relativeTimeMs, "milliseconds").toISOString();
863
- delete newEvent.relativeTimeMs;
864
- return Promise.resolve(newEvent);
865
- }
866
- catch (e) {
867
- //shouldn't happen
868
- debugger;
869
- }
870
- }));
871
-
872
- await hook(finalEvents, "funnel-post", { user, profile, scd, funnel, config });
873
- return [finalEvents, doesUserConvert];
227
+ if (isCLI || config.verbose) {
228
+ console.log('✅ Group profiles generated successfully');
229
+ }
874
230
  }
875
231
 
876
232
  /**
877
- * a function that creates a profile (user or group)
878
- * @overload
879
- * @param {{[key: string]: ValueValid}} props
880
- * @param {{[key: string]: ValueValid}} [defaults]
881
- * @returns {Promise<UserProfile>}
882
- *
883
- * @overload
884
- * @param {{[key: string]: ValueValid}} props
885
- * @param {{[key: string]: ValueValid}} [defaults]
886
- * @returns {Promise<GroupProfile>}
233
+ * Generate lookup tables for all configured lookup schemas
234
+ * @param {Context} context - Context object
887
235
  */
888
- async function makeProfile(props, defaults) {
889
- operations++;
890
- const keysToNotChoose = ["anonymousIds", "sessionIds"];
891
-
892
- const profile = {
893
- ...defaults,
894
- };
895
-
896
- for (const key in profile) {
897
- if (keysToNotChoose.includes(key)) continue;
898
- try {
899
- profile[key] = u.choose(profile[key]);
900
- }
901
- catch (e) {
902
- // never gets here
903
- debugger;
904
- }
905
- }
906
-
236
+ async function generateLookupTables(context) {
237
+ const { config, storage } = context;
238
+ const { lookupTables } = config;
907
239
 
908
- for (const key in props) {
909
- try {
910
- profile[key] = u.choose(props[key]);
911
- } catch (e) {
912
- // never gets here
913
- debugger;
914
- }
240
+ if (isCLI || config.verbose) {
241
+ console.log('\n🔍 Generating lookup tables...');
915
242
  }
916
243
 
917
- return profile;
918
- }
919
-
920
- /**
921
- * @param {SCDProp} scdProp
922
- * @param {string} scdKey
923
- * @param {string} distinct_id
924
- * @param {number} mutations
925
- * @param {string} created
926
- * @return {Promise<SCDSchema[]>}
927
- */
928
- async function makeSCD(scdProp, scdKey, distinct_id, mutations, created) {
929
- if (Array.isArray(scdProp)) scdProp = { values: scdProp, frequency: 'week', max: 10, timing: 'fuzzy', type: 'user' };
930
- const { frequency, max, timing, values, type = "user" } = scdProp;
931
- if (JSON.stringify(values) === "{}" || JSON.stringify(values) === "[]") return [];
932
- const scdEntries = [];
933
- let lastInserted = dayjs(created);
934
- const deltaDays = dayjs().diff(lastInserted, "day");
935
- const uuidKeyName = type === 'user' ? 'distinct_id' : type;
936
-
937
- for (let i = 0; i < mutations; i++) {
938
- if (lastInserted.isAfter(dayjs())) break;
939
- let scd = await makeProfile({ [scdKey]: values }, { [uuidKeyName]: distinct_id });
940
-
941
- // Explicitly constructing SCDSchema object with all required properties
942
- const scdEntry = {
943
- ...scd, // spread existing properties
944
- [uuidKeyName]: scd.distinct_id || distinct_id, // ensure distinct_id is set
945
- startTime: null,
946
- insertTime: null
947
- };
244
+ for (let i = 0; i < lookupTables.length; i++) {
245
+ const lookupConfig = lookupTables[i];
246
+ const { key, entries, attributes } = lookupConfig;
247
+ const lookupContainer = storage.lookupTableData[i];
948
248
 
949
- if (timing === 'fixed') {
950
- if (frequency === "day") scdEntry.startTime = lastInserted.add(1, "day").startOf('day').toISOString();
951
- if (frequency === "week") scdEntry.startTime = lastInserted.add(1, "week").startOf('week').toISOString();
952
- if (frequency === "month") scdEntry.startTime = lastInserted.add(1, "month").startOf('month').toISOString();
249
+ if (!lookupContainer) {
250
+ console.warn(`Warning: No storage container found for lookup table: ${key}`);
251
+ continue;
953
252
  }
954
253
 
955
- if (timing === 'fuzzy') {
956
- scdEntry.startTime = lastInserted.toISOString();
254
+ if (isCLI || config.verbose) {
255
+ console.log(` Creating ${entries.toLocaleString()} ${key} lookup entries...`);
957
256
  }
958
257
 
959
- const insertTime = lastInserted.add(u.integer(1, 9000), "seconds");
960
- scdEntry.insertTime = insertTime.toISOString();
961
-
962
-
258
+ for (let j = 0; j < entries; j++) {
259
+ const lookupEntry = await makeProfile(context, attributes, {
260
+ [key]: `${key}_${j + 1}`
261
+ });
963
262
 
964
- // Ensure TypeScript sees all required properties are set
965
- if (scdEntry.hasOwnProperty('insertTime') && scdEntry.hasOwnProperty('startTime')) {
966
- scdEntries.push(scdEntry);
263
+ await lookupContainer.hookPush(lookupEntry);
967
264
  }
968
-
969
- //advance time for next entry
970
- lastInserted = lastInserted
971
- .add(u.integer(0, deltaDays), "day")
972
- .subtract(u.integer(1, 9000), "seconds");
973
265
  }
974
266
 
975
- //de-dupe on startTime
976
- const deduped = scdEntries.filter((entry, index, self) =>
977
- index === self.findIndex((t) => (
978
- t.startTime === entry.startTime
979
- ))
980
- );
981
- return deduped;
982
- }
983
-
984
-
985
- /**
986
- * creates ad spend events for a given day for all campaigns in default campaigns
987
- * @param {string} day
988
- * @return {Promise<EventSchema[]>}
989
- */
990
- async function makeAdSpend(day, campaigns = CAMPAIGNS) {
991
- operations++;
992
- const chance = u.getChance();
993
- const adSpendEvents = [];
994
- for (const network of campaigns) {
995
- const campaigns = network.utm_campaign;
996
- loopCampaigns: for (const campaign of campaigns) {
997
- if (campaign === "$organic") continue loopCampaigns;
998
-
999
- const CAC = u.integer(42, 420); //todo: get the # of users created in this day from eventData
1000
- // Randomly generating cost
1001
- const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
1002
-
1003
- // Ensuring realistic CPC and CTR
1004
- const avgCPC = chance.floating({ min: 0.33, max: 2.00, fixed: 4 });
1005
- const avgCTR = chance.floating({ min: 0.05, max: 0.25, fixed: 4 });
1006
-
1007
- // Deriving impressions from cost and avg CPC
1008
- const clicks = Math.floor(cost / avgCPC);
1009
- const impressions = Math.floor(clicks / avgCTR);
1010
- const views = Math.floor(impressions * avgCTR);
1011
-
1012
- //tags
1013
- const utm_medium = u.choose(u.pickAWinner(network.utm_medium)());
1014
- const utm_content = u.choose(u.pickAWinner(network.utm_content)());
1015
- const utm_term = u.choose(u.pickAWinner(network.utm_term)());
1016
- //each of these is a campaign
1017
- const id = network.utm_source[0] + '-' + campaign;
1018
- const uid = md5(id);
1019
- const adSpendEvent = {
1020
- event: "$ad_spend",
1021
- time: day,
1022
- source: 'dm4',
1023
- utm_campaign: campaign,
1024
- campaign_id: id,
1025
- insert_id: uid,
1026
- network: network.utm_source[0].toUpperCase(),
1027
- distinct_id: network.utm_source[0].toUpperCase(),
1028
- utm_source: network.utm_source[0],
1029
- utm_medium,
1030
- utm_content,
1031
- utm_term,
1032
-
1033
-
1034
- clicks,
1035
- views,
1036
- impressions,
1037
- cost,
1038
- date: dayjs(day).format("YYYY-MM-DD"),
1039
- };
1040
- adSpendEvents.push(adSpendEvent);
1041
- }
1042
-
1043
-
267
+ if (isCLI || config.verbose) {
268
+ console.log('✅ Lookup tables generated successfully');
1044
269
  }
1045
- return adSpendEvents;
1046
270
  }
1047
271
 
1048
272
  /**
1049
- * takes event data and creates mirror datasets in a future state
1050
- * depending on the mirror strategy
1051
- * @param {Config} config
1052
- * @param {Storage} storage
1053
- * @return {Promise<void>}
273
+ * Generate SCDs for group entities
274
+ * @param {Context} context - Context object
1054
275
  */
1055
- async function makeMirror(config, storage) {
1056
- const { mirrorProps } = config;
1057
- const { eventData, mirrorEventData } = storage;
1058
- const now = dayjs();
1059
-
1060
- for (const oldEvent of eventData) {
1061
- let newEvent;
1062
- const eventTime = dayjs(oldEvent.time);
1063
- const delta = now.diff(eventTime, "day");
1064
-
1065
- for (const mirrorProp in mirrorProps) {
1066
- const prop = mirrorProps[mirrorProp];
1067
- const { daysUnfilled = 7, events = "*", strategy = "create", values = [] } = prop;
1068
- if (events === "*" || events.includes(oldEvent.event)) {
1069
- if (!newEvent) newEvent = clone(oldEvent);
1070
-
1071
- switch (strategy) {
1072
- case "create":
1073
- newEvent[mirrorProp] = u.choose(values);
1074
- break;
1075
- case "delete":
1076
- delete newEvent[mirrorProp];
1077
- break;
1078
- case "fill":
1079
- if (delta >= daysUnfilled) oldEvent[mirrorProp] = u.choose(values);
1080
- newEvent[mirrorProp] = u.choose(values);
1081
- break;
1082
- case "update":
1083
- if (!oldEvent[mirrorProp]) {
1084
- newEvent[mirrorProp] = u.choose(values);
1085
- }
1086
- else {
1087
- newEvent[mirrorProp] = oldEvent[mirrorProp];
1088
- }
1089
- break;
1090
- default:
1091
- throw new Error(`strategy ${strategy} is unknown`);
1092
- }
1093
-
1094
-
1095
- }
1096
- }
1097
-
1098
- const mirrorDataPoint = newEvent ? newEvent : oldEvent;
1099
- await mirrorEventData.hookPush(mirrorDataPoint);
276
+ async function generateGroupSCDs(context) {
277
+ const { config, storage } = context;
278
+ const { scdProps, groupKeys } = config;
1100
279
 
280
+ if (isCLI || config.verbose) {
281
+ console.log('\n📊 Generating group SCDs...');
1101
282
  }
1102
- }
1103
-
1104
-
1105
- /*
1106
- --------------
1107
- ORCHESTRATORS
1108
- --------------
1109
- */
1110
-
1111
283
 
1112
- /**
1113
- * a loop that creates users and their events; the loop is inside this function
1114
- * @param {Config} config
1115
- * @param {Storage} storage
1116
- * @param {number} [concurrency]
1117
- * @return {Promise<void>}
1118
- */
1119
- async function userLoop(config, storage, concurrency = 1) {
284
+ // Import utilities and generators
285
+ const { objFilter } = await import('ak-tools');
286
+ const { makeSCD } = await import('./lib/generators/scd.js');
287
+ const u = await import('./lib/utils/utils.js');
1120
288
  const chance = u.getChance();
1121
- const USER_CONN = pLimit(concurrency);
1122
- const {
1123
- verbose,
1124
- numUsers,
1125
- numEvents,
1126
- isAnonymous,
1127
- hasAvatar,
1128
- hasAnonIds,
1129
- hasSessionIds,
1130
- hasLocation,
1131
- funnels,
1132
- userProps,
1133
- scdProps,
1134
- numDays,
1135
- percentUsersBornInDataset = 5,
1136
- } = config;
1137
- const { eventData, userProfilesData, scdTableData } = storage;
1138
- const avgEvPerUser = numEvents / numUsers;
1139
- const startTime = Date.now();
1140
-
1141
- for (let i = 0; i < numUsers; i++) {
1142
- await USER_CONN(async () => {
1143
- userCount++;
1144
- const eps = Math.floor(eventCount / ((Date.now() - startTime) / 1000));
1145
- if (verbose) u.progress([["users", userCount], ["events", eventCount], ["eps", eps]]);
1146
- const userId = chance.guid();
1147
- const user = u.generateUser(userId, { numDays, isAnonymous, hasAvatar, hasAnonIds, hasSessionIds });
1148
- const { distinct_id, created } = user;
1149
- const userIsBornInDataset = chance.bool({ likelihood: percentUsersBornInDataset });
1150
- let numEventsPreformed = 0;
1151
- if (!userIsBornInDataset) delete user.created;
1152
- const adjustedCreated = userIsBornInDataset ? dayjs(created).subtract(daysShift, 'd') : dayjs.unix(global.FIXED_BEGIN);
1153
-
1154
- if (hasLocation) {
1155
- const location = u.shuffleArray(u.choose(DEFAULTS.locationsUsers)).pop();
1156
- for (const key in location) {
1157
- user[key] = location[key];
1158
- }
1159
- }
1160
289
 
1161
- // Profile creation
1162
- const profile = await makeProfile(userProps, user);
290
+ // Get only group SCDs (not user SCDs)
291
+ // @ts-ignore
292
+ const groupSCDProps = objFilter(scdProps, (scd) => scd.type && scd.type !== 'user');
1163
293
 
294
+ for (const [groupKey, groupCount] of groupKeys) {
295
+ // Filter SCDs that apply to this specific group key
296
+ // @ts-ignore
297
+ const groupSpecificSCDs = objFilter(groupSCDProps, (scd) => scd.type === groupKey);
1164
298
 
1165
- // SCD creation
1166
- const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
1167
- const scdTableKeys = Object.keys(scdUserTables);
299
+ if (Object.keys(groupSpecificSCDs).length === 0) {
300
+ continue; // No SCDs for this group type
301
+ }
1168
302
 
303
+ if (isCLI || config.verbose) {
304
+ console.log(` Generating SCDs for ${groupCount.toLocaleString()} ${groupKey} entities...`);
305
+ }
1169
306
 
1170
- const userSCD = {};
1171
- for (const [index, key] of scdTableKeys.entries()) {
1172
- // @ts-ignore
1173
- const { max = 100 } = scdProps[key];
1174
- const mutations = chance.integer({ min: 1, max });
1175
- const changes = await makeSCD(scdProps[key], key, distinct_id, mutations, created);
1176
- userSCD[key] = changes;
1177
- await config.hook(changes, "scd-pre", { profile, type: 'user', scd: { [key]: scdProps[key] }, config, allSCDs: userSCD });
1178
- }
307
+ // Generate SCDs for each group entity
308
+ for (let i = 0; i < groupCount; i++) {
309
+ const groupId = `${groupKey}_${i + 1}`;
1179
310
 
1180
- let numEventsThisUserWillPreform = Math.floor(chance.normal({
1181
- mean: avgEvPerUser,
1182
- dev: avgEvPerUser / u.integer(u.integer(2, 5), u.integer(2, 7))
1183
- }) * 0.714159265359);
1184
-
1185
- // Power users and Shitty users logic...
1186
- chance.bool({ likelihood: 20 }) ? numEventsThisUserWillPreform *= 5 : null;
1187
- chance.bool({ likelihood: 15 }) ? numEventsThisUserWillPreform *= 0.333 : null;
1188
- numEventsThisUserWillPreform = Math.round(numEventsThisUserWillPreform);
1189
-
1190
- let userFirstEventTime;
1191
-
1192
- const firstFunnels = funnels.filter((f) => f.isFirstFunnel).reduce(u.weighFunnels, []);
1193
- const usageFunnels = funnels.filter((f) => !f.isFirstFunnel).reduce(u.weighFunnels, []);
1194
-
1195
- const secondsInDay = 86400;
1196
- const noise = () => chance.integer({ min: 0, max: secondsInDay });
1197
- let usersEvents = [];
1198
-
1199
- if (firstFunnels.length && userIsBornInDataset) {
1200
- const firstFunnel = chance.pickone(firstFunnels, user);
1201
-
1202
- const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
1203
- const [data, userConverted] = await makeFunnel(firstFunnel, user, firstTime, profile, userSCD, config);
1204
- userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
1205
- numEventsPreformed += data.length;
1206
- // await eventData.hookPush(data, { profile });
1207
- usersEvents.push(...data);
1208
- if (!userConverted) {
1209
- if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
1210
- return;
1211
- }
1212
- } else {
1213
- // userFirstEventTime = dayjs(created).unix();
1214
- // userFirstEventTime = global.FIXED_BEGIN;
1215
- userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
1216
- }
311
+ // Generate SCDs for this group entity
312
+ for (const [scdKey, scdConfig] of Object.entries(groupSpecificSCDs)) {
313
+ const { max = 10 } = scdConfig;
314
+ const mutations = chance.integer({ min: 1, max });
1217
315
 
1218
- while (numEventsPreformed < numEventsThisUserWillPreform) {
1219
- if (usageFunnels.length) {
1220
- const currentFunnel = chance.pickone(usageFunnels);
1221
- const [data, userConverted] = await makeFunnel(currentFunnel, user, userFirstEventTime, profile, userSCD, config);
1222
- numEventsPreformed += data.length;
1223
- usersEvents.push(...data);
1224
- // await eventData.hookPush(data, { profile });
1225
- } else {
1226
- const data = await makeEvent(distinct_id, userFirstEventTime, u.pick(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
1227
- numEventsPreformed++;
1228
- usersEvents.push(data);
1229
- // await eventData.hookPush(data);
316
+ // Use a base time for the group entity (similar to user creation time)
317
+ const baseTime = context.FIXED_BEGIN || context.FIXED_NOW;
318
+ const changes = await makeSCD(context, scdConfig, scdKey, groupId, mutations, baseTime);
319
+
320
+ // Apply hook if configured
321
+ if (config.hook) {
322
+ await config.hook(changes, "scd-pre", {
323
+ type: 'group',
324
+ groupKey,
325
+ scd: { [scdKey]: scdConfig },
326
+ config
327
+ });
1230
328
  }
1231
- }
1232
-
1233
- // NOW ADD ALL OUR DATA FOR THIS USER
1234
- if (config.hook) {
1235
- const newEvents = await config.hook(usersEvents, "everything", { profile, scd: userSCD, config, userIsBornInDataset });
1236
- if (Array.isArray(newEvents)) usersEvents = newEvents;
1237
- }
1238
329
 
1239
- await userProfilesData.hookPush(profile);
1240
-
1241
- if (Object.keys(userSCD).length) {
1242
- for (const [key, changesArray] of Object.entries(userSCD)) {
1243
- for (const changes of changesArray) {
1244
- const target = scdTableData.filter(arr => arr.scdKey === key).pop();
1245
- await target.hookPush(changes, { profile, type: 'user' });
330
+ // Store SCDs in the appropriate SCD table
331
+ for (const change of changes) {
332
+ try {
333
+ const target = storage.scdTableData.filter(arr => arr.scdKey === scdKey).pop();
334
+ await target.hookPush(change, { type: 'group', groupKey });
335
+ } catch (e) {
336
+ // Fallback for tests
337
+ const target = storage.scdTableData[0];
338
+ await target.hookPush(change, { type: 'group', groupKey });
1246
339
  }
1247
340
  }
1248
341
  }
1249
- await eventData.hookPush(usersEvents, { profile });
1250
-
1251
-
1252
- if (verbose) u.progress([["users", userCount], ["events", eventCount]]);
1253
- });
342
+ }
1254
343
  }
1255
344
 
345
+ if (isCLI || config.verbose) {
346
+ console.log('✅ Group SCDs generated successfully');
347
+ }
1256
348
  }
1257
349
 
1258
-
1259
350
  /**
1260
- * sends the data to mixpanel
1261
- * todo: this needs attention
1262
- * @param {Config} config
1263
- * @param {Storage} storage
351
+ * Generate charts for data visualization
352
+ * @param {Context} context - Context object
1264
353
  */
1265
- async function sendToMixpanel(config, storage) {
1266
- const {
1267
- adSpendData,
1268
- eventData,
1269
- groupProfilesData,
1270
- lookupTableData,
1271
- mirrorEventData,
1272
- scdTableData,
1273
- userProfilesData,
1274
- groupEventData
1275
-
1276
- } = storage;
1277
- const { token, region, writeToDisk = true } = config;
1278
- const importResults = { events: {}, users: {}, groups: [] };
1279
-
1280
- /** @type {import('mixpanel-import').Creds} */
1281
- const creds = { token };
1282
- const { format } = config;
1283
- const mpImportFormat = format === "json" ? "jsonl" : "csv";
1284
- /** @type {import('mixpanel-import').Options} */
1285
- const commonOpts = {
1286
- region,
1287
- fixData: true,
1288
- verbose: false,
1289
- forceStream: true,
1290
- strict: true, //false,
1291
- epochEnd: dayjs().unix(), //is this chill?
1292
- dryRun: false,
1293
- abridged: false,
1294
- fixJson: true,
1295
- showProgress: NODE_ENV === "dev" ? true : false,
1296
- streamFormat: mpImportFormat
1297
- };
1298
-
1299
- if (isCLI) commonOpts.showProgress = true;
354
+ async function generateCharts(context) {
355
+ const { config, storage } = context;
1300
356
 
357
+ if (config.makeChart && storage.eventData?.length > 0) {
358
+ const chartPath = typeof config.makeChart === 'string'
359
+ ? config.makeChart
360
+ : `./charts/${config.simulationName}-timeline.png`;
1301
361
 
362
+ await generateLineChart(storage.eventData, undefined, chartPath);
1302
363
 
1303
- if (eventData || isBATCH_MODE) {
1304
- log(`importing events to mixpanel...\n`);
1305
- let eventDataToImport = clone(eventData);
1306
- if (isBATCH_MODE) {
1307
- const writeDir = eventData.getWriteDir();
1308
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1309
- eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
1310
- }
1311
- const imported = await mp(creds, eventDataToImport, {
1312
- recordType: "event",
1313
- ...commonOpts,
1314
- });
1315
- log(`\tsent ${comma(imported.success)} events\n`);
1316
- importResults.events = imported;
1317
- }
1318
- if (userProfilesData || isBATCH_MODE) {
1319
- log(`importing user profiles to mixpanel...\n`);
1320
- let userProfilesToImport = clone(userProfilesData);
1321
- if (isBATCH_MODE) {
1322
- const writeDir = userProfilesData.getWriteDir();
1323
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1324
- userProfilesToImport = files.filter(f => f.includes('-USERS-'));
1325
- }
1326
- const imported = await mp(creds, userProfilesToImport, {
1327
- recordType: "user",
1328
- ...commonOpts,
1329
- });
1330
- log(`\tsent ${comma(imported.success)} user profiles\n`);
1331
- importResults.users = imported;
1332
- }
1333
- if (groupEventData || isBATCH_MODE) {
1334
- log(`importing ad spend data to mixpanel...\n`);
1335
- let adSpendDataToImport = clone(adSpendData);
1336
- if (isBATCH_MODE) {
1337
- const writeDir = adSpendData.getWriteDir();
1338
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1339
- adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
1340
- }
1341
- const imported = await mp(creds, adSpendDataToImport, {
1342
- recordType: "event",
1343
- ...commonOpts,
1344
- });
1345
- log(`\tsent ${comma(imported.success)} ad spend events\n`);
1346
- importResults.adSpend = imported;
1347
- }
1348
- if (groupProfilesData || isBATCH_MODE) {
1349
- for (const groupEntity of groupProfilesData) {
1350
- const groupKey = groupEntity?.groupKey;
1351
- log(`importing ${groupKey} profiles to mixpanel...\n`);
1352
- let groupProfilesToImport = clone(groupEntity);
1353
- if (isBATCH_MODE) {
1354
- const writeDir = groupEntity.getWriteDir();
1355
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1356
- groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
1357
- }
1358
- const imported = await mp({ token, groupKey }, groupProfilesToImport, {
1359
- recordType: "group",
1360
- ...commonOpts,
1361
-
1362
- });
1363
- log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
1364
-
1365
- importResults.groups.push(imported);
1366
- }
1367
- }
1368
-
1369
- if (groupEventData || isBATCH_MODE) {
1370
- log(`importing group events to mixpanel...\n`);
1371
- let groupEventDataToImport = clone(groupEventData);
1372
- if (isBATCH_MODE) {
1373
- const writeDir = groupEventData.getWriteDir();
1374
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1375
- groupEventDataToImport = files.filter(f => f.includes('-GROUP-EVENTS-'));
1376
- }
1377
- const imported = await mp(creds, groupEventDataToImport, {
1378
- recordType: "event",
1379
- ...commonOpts,
1380
- strict: false
1381
- });
1382
- log(`\tsent ${comma(imported.success)} group events\n`);
1383
- importResults.groupEvents = imported;
1384
- }
1385
- const { serviceAccount, projectId, serviceSecret } = config;
1386
- if (serviceAccount && projectId && serviceSecret) {
1387
- if (scdTableData || isBATCH_MODE) {
1388
- log(`importing SCD data to mixpanel...\n`);
1389
- for (const scdEntity of scdTableData) {
1390
- const scdKey = scdEntity?.scdKey;
1391
- log(`importing ${scdKey} SCD data to mixpanel...\n`);
1392
- let scdDataToImport = clone(scdEntity);
1393
- if (isBATCH_MODE) {
1394
- const writeDir = scdEntity.getWriteDir();
1395
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
1396
- scdDataToImport = files.filter(f => f.includes(`-SCD-${scdKey}`));
1397
- }
1398
-
1399
- /** @type {import('mixpanel-import').Options} */
1400
- const options = {
1401
- recordType: "scd",
1402
- scdKey,
1403
- scdType: scdEntity.dataType,
1404
- scdLabel: `${scdKey}-scd`,
1405
- ...commonOpts,
1406
- };
1407
- if (scdEntity.entityType !== "user") options.groupKey = scdEntity.entityType;
1408
- const imported = await mp(
1409
- {
1410
- token,
1411
- acct: serviceAccount,
1412
- pass: serviceSecret,
1413
- project: projectId
1414
- },
1415
- scdDataToImport,
1416
- // @ts-ignore
1417
- options);
1418
- log(`\tsent ${comma(imported.success)} ${scdKey} SCD data\n`);
1419
- importResults[`${scdKey}_scd`] = imported;
1420
- }
1421
- }
1422
- }
1423
-
1424
- //if we are in batch mode, we need to delete the files
1425
- if (!writeToDisk && isBATCH_MODE) {
1426
- const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
1427
- const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
1428
- const files = listDir.filter(f => f.includes('-EVENTS-') || f.includes('-USERS-') || f.includes('-AD-SPEND-') || f.includes('-GROUPS-') || f.includes('-GROUP-EVENTS-'));
1429
- for (const file of files) {
1430
- await rm(file);
364
+ if (isCLI || config.verbose) {
365
+ console.log(`📊 Chart generated: ${chartPath}`);
366
+ } else {
367
+ sLog("Chart generated", { path: chartPath });
1431
368
  }
1432
369
  }
1433
- return importResults;
1434
370
  }
1435
371
 
1436
- /*
1437
- ----
1438
- META
1439
- ----
1440
- */
1441
-
1442
-
1443
372
  /**
1444
- * ensures that the config is valid and has all the necessary fields
1445
- * also adds some defaults
1446
- * @param {Config} config
373
+ * Flush all storage containers to disk
374
+ * @param {import('./types').Storage} storage - Storage containers
375
+ * @param {import('./types').Dungeon} config - Configuration object
1447
376
  */
1448
- function validateDungeonConfig(config) {
1449
- const chance = u.getChance();
1450
- let {
1451
- seed,
1452
- numEvents = 100_000,
1453
- numUsers = 1000,
1454
- numDays = 30,
1455
- epochStart = 0,
1456
- epochEnd = dayjs().unix(),
1457
- events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
1458
- superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
1459
- funnels = [],
1460
- userProps = {
1461
- spiritAnimal: chance.animal.bind(chance),
1462
- },
1463
- scdProps = {},
1464
- mirrorProps = {},
1465
- groupKeys = [],
1466
- groupProps = {},
1467
- lookupTables = [],
1468
- hasAnonIds = false,
1469
- hasSessionIds = false,
1470
- format = "csv",
1471
- token = null,
1472
- region = "US",
1473
- writeToDisk = false,
1474
- verbose = false,
1475
- makeChart = false,
1476
- soup = {},
1477
- hook = (record) => record,
1478
- hasAdSpend = false,
1479
- hasCampaigns = false,
1480
- hasLocation = false,
1481
- hasAvatar = false,
1482
- isAnonymous = false,
1483
- hasBrowser = false,
1484
- hasAndroidDevices = false,
1485
- hasDesktopDevices = false,
1486
- hasIOSDevices = false,
1487
- alsoInferFunnels = false,
1488
- name = "",
1489
- batchSize = 500_000,
1490
- concurrency = 500
1491
- } = config;
1492
-
1493
- //ensuring default for deep objects
1494
- if (!config.superProps) config.superProps = superProps;
1495
- if (!config.userProps || Object.keys(config?.userProps)) config.userProps = userProps;
1496
-
1497
- //setting up "TIME"
1498
- if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
1499
- if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
1500
- if (epochStart && numDays) { } //noop
1501
- if (!epochStart && !numDays) debugger; //never happens
1502
-
1503
- config.simulationName = name || makeName();
1504
- config.name = config.simulationName;
1505
-
1506
- //events
1507
- if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
1508
- // @ts-ignore
1509
- if (typeof events[0] === "string") events = events.map(e => ({ event: e }));
1510
-
1511
- //max batch size
1512
- if (batchSize > 0) BATCH_SIZE = batchSize;
377
+ async function flushStorageToDisk(storage, config) {
378
+ if (isCLI || config.verbose) {
379
+ console.log('\n💾 Writing data to disk...');
380
+ }
1513
381
 
1514
- // funnels
382
+ const flushPromises = [];
1515
383
 
1516
- // FUNNEL INFERENCE
1517
- // if (!funnels || !funnels.length) {
1518
- // funnels = inferFunnels(events);
1519
- // }
384
+ // Flush single HookedArray containers
385
+ if (storage.eventData?.flush) flushPromises.push(storage.eventData.flush());
386
+ if (storage.userProfilesData?.flush) flushPromises.push(storage.userProfilesData.flush());
387
+ if (storage.adSpendData?.flush) flushPromises.push(storage.adSpendData.flush());
388
+ if (storage.mirrorEventData?.flush) flushPromises.push(storage.mirrorEventData.flush());
389
+ if (storage.groupEventData?.flush) flushPromises.push(storage.groupEventData.flush());
1520
390
 
1521
- if (alsoInferFunnels) {
1522
- const inferredFunnels = inferFunnels(events);
1523
- funnels = [...funnels, ...inferredFunnels];
1524
- }
391
+ // Flush arrays of HookedArrays
392
+ [storage.scdTableData, storage.groupProfilesData, storage.lookupTableData].forEach(arrayOfContainers => {
393
+ if (Array.isArray(arrayOfContainers)) {
394
+ arrayOfContainers.forEach(container => {
395
+ if (container?.flush) flushPromises.push(container.flush());
396
+ });
397
+ }
398
+ });
1525
399
 
400
+ await Promise.all(flushPromises);
1526
401
 
1527
- const eventContainedInFunnels = Array.from(funnels.reduce((acc, f) => {
1528
- const events = f.sequence;
1529
- events.forEach(event => acc.add(event));
1530
- return acc;
1531
- }, new Set()));
1532
-
1533
- const eventsNotInFunnels = events
1534
- .filter(e => !e.isFirstEvent)
1535
- .filter(e => !eventContainedInFunnels.includes(e.event)).map(e => e.event);
1536
- if (eventsNotInFunnels.length) {
1537
- // const biggestWeight = funnels.reduce((acc, f) => {
1538
- // if (f.weight > acc) return f.weight;
1539
- // return acc;
1540
- // }, 0);
1541
- // const smallestWeight = funnels.reduce((acc, f) => {
1542
- // if (f.weight < acc) return f.weight;
1543
- // return acc;
1544
- // }, 0);
1545
- // const weight = u.integer(smallestWeight, biggestWeight) * 2;
1546
-
1547
- const sequence = u.shuffleArray(eventsNotInFunnels.flatMap(event => {
1548
- const evWeight = config.events.find(e => e.event === event)?.weight || 1;
1549
- return Array(evWeight).fill(event);
1550
- }));
1551
-
1552
-
1553
-
1554
- funnels.push({
1555
- sequence,
1556
- conversionRate: 50,
1557
- order: 'random',
1558
- timeToConvert: 24 * 14,
1559
- requireRepeats: false,
1560
- });
402
+ if (isCLI || config.verbose) {
403
+ console.log('✅ Data flushed to disk successfully');
1561
404
  }
1562
-
1563
- config.concurrency = concurrency;
1564
- config.funnels = funnels;
1565
- config.batchSize = batchSize;
1566
- config.seed = seed;
1567
- config.numEvents = numEvents;
1568
- config.numUsers = numUsers;
1569
- config.numDays = numDays;
1570
- config.epochStart = epochStart;
1571
- config.epochEnd = epochEnd;
1572
- config.events = events;
1573
- config.superProps = superProps;
1574
- config.funnels = funnels;
1575
- config.userProps = userProps;
1576
- config.scdProps = scdProps;
1577
- config.mirrorProps = mirrorProps;
1578
- config.groupKeys = groupKeys;
1579
- config.groupProps = groupProps;
1580
- config.lookupTables = lookupTables;
1581
- config.hasAnonIds = hasAnonIds;
1582
- config.hasSessionIds = hasSessionIds;
1583
- config.format = format;
1584
- config.token = token;
1585
- config.region = region;
1586
- config.writeToDisk = writeToDisk;
1587
- config.verbose = verbose;
1588
- config.makeChart = makeChart;
1589
- config.soup = soup;
1590
- config.hook = hook;
1591
- config.hasAdSpend = hasAdSpend;
1592
- config.hasCampaigns = hasCampaigns;
1593
- config.hasLocation = hasLocation;
1594
- config.hasAvatar = hasAvatar;
1595
- config.isAnonymous = isAnonymous;
1596
- config.hasBrowser = hasBrowser;
1597
- config.hasAndroidDevices = hasAndroidDevices;
1598
- config.hasDesktopDevices = hasDesktopDevices;
1599
- config.hasIOSDevices = hasIOSDevices;
1600
-
1601
- //event validation
1602
- const validatedEvents = u.validateEventConfig(events);
1603
- events = validatedEvents;
1604
- config.events = validatedEvents;
1605
-
1606
- return config;
1607
405
  }
1608
406
 
1609
- /**
1610
- * our meta programming function which lets you mutate items as they are pushed into an array
1611
- * it also does batching and writing to disk
1612
- * it kind of is a class - as it produces new objects - but it's not a class
1613
- * @param {Object} arr
1614
- * @param {hookArrayOptions} opts
1615
- * @returns {Promise<hookArray>}
407
+ /**
408
+ * Extract file information from storage containers
409
+ * @param {import('./types').Storage} storage - Storage object
410
+ * @returns {string[]} Array of file paths
1616
411
  */
1617
- async function makeHookArray(arr = [], opts = {}) {
1618
- const { hook = a => a, type = "", filepath = "./defaultFile", format = "csv", concurrency = 1, ...rest } = opts;
1619
- const FILE_CONN = pLimit(concurrency); // concurrent file writes
1620
- let batch = 0;
1621
- let writeDir;
1622
- const dataFolder = path.resolve("./data");
1623
- if (existsSync(dataFolder)) writeDir = dataFolder;
1624
- else writeDir = path.resolve("./");
1625
-
1626
- // ! decide where to write the files in prod
1627
- if (NODE_ENV === "prod") {
1628
- writeDir = path.resolve(os.tmpdir());
1629
- }
1630
- if (typeof rest?.config?.writeToDisk === "string" && rest?.config?.writeToDisk?.startsWith('gs://')) {
1631
- writeDir = rest.config.writeToDisk;
1632
- }
1633
-
1634
- function getWritePath() {
1635
- if (isBATCH_MODE) {
1636
- if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
1637
- return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
1638
- }
1639
- else {
1640
- if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
1641
- return path.join(writeDir, `${filepath}.${format}`);
1642
- }
1643
- }
1644
-
1645
- function getWriteDir() {
1646
- return path.join(writeDir, `${filepath}.${format}`);
1647
- }
1648
-
1649
- async function transformThenPush(item, meta) {
1650
- if (item === null || item === undefined) return false;
1651
- if (typeof item === 'object' && Object.keys(item).length === 0) return false;
1652
- const allMetaData = { ...rest, ...meta };
1653
- if (Array.isArray(item)) {
1654
- for (const i of item) {
1655
- try {
1656
- const enriched = await hook(i, type, allMetaData);
1657
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1658
- else arr.push(enriched);
1659
- } catch (e) {
1660
- console.error(`\n\nyour hook had an error\n\n`, e);
1661
- arr.push(i);
412
+ function extractFileInfo(storage) {
413
+ const files = [];
414
+
415
+ Object.values(storage).forEach(container => {
416
+ if (Array.isArray(container)) {
417
+ container.forEach(subContainer => {
418
+ if (subContainer?.getWritePath) {
419
+ files.push(subContainer.getWritePath());
1662
420
  }
1663
- }
1664
- } else {
1665
- try {
1666
- const enriched = await hook(item, type, allMetaData);
1667
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
1668
- else arr.push(enriched);
1669
- } catch (e) {
1670
- console.error(`\n\nyour hook had an error\n\n`, e);
1671
- arr.push(item);
1672
- }
1673
- }
1674
-
1675
- if (arr.length > BATCH_SIZE) {
1676
- isBATCH_MODE = true;
1677
- batch++;
1678
- const writePath = getWritePath();
1679
- const writeResult = await FILE_CONN(() => writeToDisk(arr, { writePath }));
1680
- return writeResult;
1681
- } else {
1682
- return Promise.resolve(false);
1683
- }
1684
- }
1685
-
1686
- async function writeToDisk(data, options) {
1687
- const { writePath } = options;
1688
- let writeResult;
1689
- if (VERBOSE) log(`\n\n\twriting ${writePath}\n\n`);
1690
- switch (format) {
1691
- case "csv":
1692
- writeResult = await u.streamCSV(writePath, data);
1693
- break;
1694
- case "json":
1695
- writeResult = await u.streamJSON(writePath, data);
1696
- break;
1697
- default:
1698
- throw new Error(`format ${format} is not supported`);
1699
- }
1700
- if (isBATCH_MODE) data.length = 0;
1701
- return writeResult;
1702
-
1703
- }
1704
-
1705
- async function flush() {
1706
- if (arr.length > 0) {
1707
- batch++;
1708
- const writePath = getWritePath();
1709
- await FILE_CONN(() => writeToDisk(arr, { writePath }));
1710
- if (isBATCH_MODE) arr.length = 0; // free up memory for batch mode
421
+ });
422
+ } else if (container?.getWritePath) {
423
+ files.push(container.getWritePath());
1711
424
  }
1712
- }
1713
-
1714
- const enrichedArray = arr;
1715
-
1716
- enrichedArray.hookPush = transformThenPush;
1717
- enrichedArray.flush = flush;
1718
- enrichedArray.getWriteDir = getWriteDir;
1719
- enrichedArray.getWritePath = getWritePath;
1720
-
1721
- for (const key in rest) {
1722
- enrichedArray[key.toString()] = rest[key];
1723
- }
425
+ });
1724
426
 
1725
- return enrichedArray;
427
+ return files;
1726
428
  }
1727
429
 
1728
-
1729
430
  /**
1730
- * create funnels out of random events
1731
- * @param {EventConfig[]} events
431
+ * Extract data from storage containers, preserving array structure for groups/lookups/SCDs
432
+ * @param {import('./types').Storage} storage - Storage object
433
+ * @returns {object} Extracted data in Result format
1732
434
  */
1733
- function inferFunnels(events) {
1734
- const createdFunnels = [];
1735
- const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
1736
- const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
1737
- const numFunnelsToCreate = Math.ceil(usageEvents.length);
1738
- /** @type {Funnel} */
1739
- const funnelTemplate = {
1740
- sequence: [],
1741
- conversionRate: 50,
1742
- order: 'sequential',
1743
- requireRepeats: false,
1744
- props: {},
1745
- timeToConvert: 1,
1746
- isFirstFunnel: false,
1747
- weight: 1
435
+ function extractStorageData(storage) {
436
+ return {
437
+ eventData: storage.eventData || [],
438
+ mirrorEventData: storage.mirrorEventData || [],
439
+ userProfilesData: storage.userProfilesData || [],
440
+ adSpendData: storage.adSpendData || [],
441
+ // Keep arrays of HookedArrays as separate arrays (don't flatten)
442
+ scdTableData: storage.scdTableData || [],
443
+ groupProfilesData: storage.groupProfilesData || [],
444
+ lookupTableData: storage.lookupTableData || []
1748
445
  };
1749
- if (firstEvents.length) {
1750
- for (const event of firstEvents) {
1751
- createdFunnels.push({ ...clone(funnelTemplate), sequence: [event], isFirstFunnel: true, conversionRate: 100 });
1752
- }
1753
- }
1754
-
1755
- //at least one funnel with all usage events
1756
- createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
1757
-
1758
- //for the rest, make random funnels
1759
- followUpFunnels: for (let i = 1; i < numFunnelsToCreate; i++) {
1760
- /** @type {Funnel} */
1761
- const funnel = { ...clone(funnelTemplate) };
1762
- funnel.conversionRate = u.integer(25, 75);
1763
- funnel.timeToConvert = u.integer(1, 10);
1764
- funnel.weight = u.integer(1, 10);
1765
- const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
1766
- funnel.sequence = sequence;
1767
- funnel.order = 'random';
1768
- createdFunnels.push(funnel);
1769
- }
1770
-
1771
- return createdFunnels;
1772
-
1773
446
  }
1774
447
 
1775
-
1776
- /*
1777
- ----
1778
- CLI
1779
- ----
1780
- */
1781
-
1782
- if (NODE_ENV !== "prod") {
1783
- if (require.main === module) {
1784
- isCLI = true;
1785
- const args = /** @type {Config} */ (getCliParams());
1786
- let { token, seed, format, numDays, numUsers, numEvents, region, writeToDisk, complex = false, hasSessionIds, hasAnonIds } = args;
1787
- const suppliedConfig = args._[0];
1788
-
1789
- //if the user specifies an separate config file
1790
- let config = null;
1791
- if (suppliedConfig) {
1792
- console.log(`using ${suppliedConfig} for data\n`);
1793
- config = require(path.resolve(suppliedConfig));
1794
- }
1795
- else {
1796
- if (complex) {
1797
- console.log(`... using default COMPLEX configuration [everything] ...\n`);
1798
- console.log(`... for more simple data, don't use the --complex flag ...\n`);
1799
- console.log(`... or specify your own js config file (see docs or --help) ...\n`);
1800
- config = require(path.resolve(__dirname, "./dungeons/complex.js"));
1801
- }
1802
- else {
1803
- console.log(`... using default SIMPLE configuration [events + users] ...\n`);
1804
- console.log(`... for more complex data, use the --complex flag ...\n`);
1805
- config = require(path.resolve(__dirname, "./dungeons/simple.js"));
1806
- }
1807
- }
1808
-
1809
- //override config with cli params
1810
- if (token) config.token = token;
1811
- if (seed) config.seed = seed;
1812
- if (format === "csv" && config.format === "json") format = "json";
1813
- if (format) config.format = format;
1814
- if (numDays) config.numDays = numDays;
1815
- if (numUsers) config.numUsers = numUsers;
1816
- if (numEvents) config.numEvents = numEvents;
1817
- if (region) config.region = region;
1818
- if (writeToDisk) config.writeToDisk = writeToDisk;
1819
- if (writeToDisk === 'false') config.writeToDisk = false;
1820
- if (hasSessionIds) config.hasSessionIds = hasSessionIds;
1821
- if (hasAnonIds) config.hasAnonIds = hasAnonIds;
1822
- config.verbose = true;
1823
-
1824
- main(config)
1825
- .then((data) => {
1826
- log(`-----------------SUMMARY-----------------`);
1827
- const d = { success: 0, bytes: 0 };
1828
- const darr = [d];
1829
- const { events = d, groups = darr, users = d } = data?.importResults || {};
1830
- const files = data.files;
1831
- const folder = files?.[0]?.split(path.basename(files?.[0]))?.shift() || "./";
1832
- const groupBytes = groups.reduce((acc, group) => {
1833
- return acc + group.bytes;
1834
- }, 0);
1835
- const groupSuccess = groups.reduce((acc, group) => {
1836
- return acc + group.success;
1837
- }, 0);
1838
- const bytes = events.bytes + groupBytes + users.bytes;
1839
- const stats = {
1840
- events: comma(events.success || 0),
1841
- users: comma(users.success || 0),
1842
- groups: comma(groupSuccess || 0),
1843
- bytes: bytesHuman(bytes || 0),
1844
- };
1845
- if (bytes > 0) console.table(stats);
1846
- if (Object.keys(data?.importResults || {}).length) {
1847
- log(`\nlog written to log.json\n`);
1848
- writeFileSync(path.resolve(folder, "log.json"), JSON.stringify(data?.importResults, null, 2));
448
+ // CLI execution
449
+ if (isCLI) {
450
+ (async () => {
451
+ const cliConfig = getCliParams();
452
+
453
+ // Load dungeon config if --complex or --simple flags are used
454
+ let finalConfig = cliConfig;
455
+ if (cliConfig.complex) {
456
+ const complexConfig = await import('./dungeons/complex.js');
457
+ finalConfig = { ...complexConfig.default, ...cliConfig };
458
+ } else if (cliConfig.simple) {
459
+ const simpleConfig = await import('./dungeons/simple.js');
460
+ finalConfig = { ...simpleConfig.default, ...cliConfig };
461
+ }
462
+
463
+ main(finalConfig)
464
+ .then(result => {
465
+ console.log(`📊 Generated ${(result.eventCount || 0).toLocaleString()} events for ${(result.userCount || 0).toLocaleString()} users`);
466
+ console.log(`⏱️ Total time: ${result.time?.human || 'unknown'}`);
467
+ if (result.files?.length) {
468
+ console.log(`📁 Files written: ${result.files.length}`);
469
+ if (cliConfig.verbose) {
470
+ result.files.forEach(file => console.log(` ${file}`));
471
+ }
1849
472
  }
1850
- // log(" " + files?.flat().join("\n "));
1851
- log(`\n----------------SUMMARY-----------------\n\n\n`);
473
+ console.log(`\n✅ Job completed successfully!`);
474
+ process.exit(0);
1852
475
  })
1853
- .catch((e) => {
1854
- log(`------------------ERROR------------------`);
1855
- console.error(e);
1856
- log(`------------------ERROR------------------`);
1857
- debugger;
1858
- })
1859
- .finally(() => {
1860
- log("enjoy your data! :)");
476
+ .catch(error => {
477
+ console.error(`\n❌ Job failed: ${error.message}`);
478
+ if (cliConfig.verbose) {
479
+ console.error(error.stack);
480
+ }
481
+ process.exit(1);
1861
482
  });
1862
- } else {
1863
- main.generators = { makeEvent, makeFunnel, makeProfile, makeSCD, makeAdSpend, makeMirror };
1864
- main.orchestrators = { userLoop, validateDungeonConfig, sendToMixpanel };
1865
- main.meta = { inferFunnels, hookArray: makeHookArray };
1866
- module.exports = main;
1867
- }
483
+ })();
1868
484
  }
1869
485
 
486
+ // Cloud Functions setup
487
+ functions.http('entry', async (req, res) => {
488
+ await handleCloudFunctionEntry(req, res, main);
489
+ });
1870
490
 
491
+ // ES Module export
492
+ export default main;
1871
493
 
1872
- /*
1873
- ----
1874
- HELPERS
1875
- ----
1876
- */
1877
-
1878
- function log(...args) {
1879
- const cwd = process.cwd(); // Get the current working directory
1880
-
1881
- for (let i = 0; i < args.length; i++) {
1882
- // Replace occurrences of the current working directory with "./" in string arguments
1883
- if (typeof args[i] === 'string') {
1884
- args[i] = args[i].replace(new RegExp(cwd, 'g'), ".");
1885
- }
1886
- }
1887
- if (VERBOSE) console.log(...args);
1888
- }
1889
-
1890
- function track(name, props, ...rest) {
1891
- if (process.env.NODE_ENV === 'test') return;
1892
- metrics(name, props, ...rest);
1893
- }
1894
-
1895
-
1896
- /** @typedef {import('./types.js').Dungeon} Config */
1897
- /** @typedef {import('./types.js').AllData} AllData */
1898
- /** @typedef {import('./types.js').EventConfig} EventConfig */
1899
- /** @typedef {import('./types.js').Funnel} Funnel */
1900
- /** @typedef {import('./types.js').Person} Person */
1901
- /** @typedef {import('./types.js').SCDSchema} SCDSchema */
1902
- /** @typedef {import('./types.js').UserProfile} UserProfile */
1903
- /** @typedef {import('./types.js').EventSchema} EventSchema */
1904
- /** @typedef {import('./types.js').Storage} Storage */
1905
- /** @typedef {import('./types.js').Result} Result */
1906
- /** @typedef {import('./types.js').ValueValid} ValueValid */
1907
- /** @typedef {import('./types.js').HookedArray} hookArray */
1908
- /** @typedef {import('./types.js').hookArrayOptions} hookArrayOptions */
1909
- /** @typedef {import('./types.js').GroupProfileSchema} GroupProfile */
1910
- /** @typedef {import('./types.js').SCDProp} SCDProp */
494
+ // CommonJS compatibility
495
+ if (typeof module !== 'undefined' && module.exports) {
496
+ module.exports = main;
497
+ }