make-mp-data 2.0.22 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dungeons/ai-chat-analytics-ed.js +274 -0
- package/dungeons/business.js +0 -1
- package/dungeons/complex.js +0 -1
- package/dungeons/experiments.js +0 -1
- package/dungeons/gaming.js +47 -14
- package/dungeons/media.js +5 -6
- package/dungeons/mil.js +296 -0
- package/dungeons/money2020-ed-also.js +277 -0
- package/dungeons/money2020-ed.js +579 -0
- package/dungeons/sanity.js +0 -1
- package/dungeons/scd.js +0 -1
- package/dungeons/simple.js +57 -18
- package/dungeons/student-teacher.js +0 -1
- package/dungeons/text-generation.js +706 -0
- package/dungeons/userAgent.js +1 -2
- package/entry.js +4 -0
- package/index.js +63 -38
- package/lib/cli/cli.js +7 -8
- package/lib/core/config-validator.js +11 -13
- package/lib/core/context.js +13 -1
- package/lib/core/storage.js +45 -13
- package/lib/generators/adspend.js +1 -1
- package/lib/generators/events.js +18 -17
- package/lib/generators/funnels.js +293 -240
- package/lib/generators/text-bak-old.js +1121 -0
- package/lib/generators/text.js +1173 -0
- package/lib/orchestrators/mixpanel-sender.js +1 -1
- package/lib/templates/abbreviated.d.ts +13 -3
- package/lib/templates/defaults.js +311 -169
- package/lib/templates/hooks-instructions.txt +434 -0
- package/lib/templates/phrases-bak.js +925 -0
- package/lib/templates/phrases.js +2066 -0
- package/lib/templates/{instructions.txt → schema-instructions.txt} +78 -1
- package/lib/templates/scratch-dungeon-template.js +1 -1
- package/lib/templates/textQuickTest.js +172 -0
- package/lib/utils/ai.js +51 -2
- package/lib/utils/utils.js +145 -7
- package/package.json +8 -5
- package/types.d.ts +322 -7
- package/lib/utils/chart.js +0 -206
package/dungeons/userAgent.js
CHANGED
|
@@ -14,7 +14,7 @@ const days = 30;
|
|
|
14
14
|
|
|
15
15
|
/** @type {Config} */
|
|
16
16
|
const config = {
|
|
17
|
-
token: "
|
|
17
|
+
token: "",
|
|
18
18
|
seed: SEED,
|
|
19
19
|
numDays: days,
|
|
20
20
|
numEvents: num_users * 100,
|
|
@@ -33,7 +33,6 @@ const config = {
|
|
|
33
33
|
hasAdSpend: true,
|
|
34
34
|
|
|
35
35
|
hasAvatar: true,
|
|
36
|
-
makeChart: false,
|
|
37
36
|
|
|
38
37
|
batchSize: 500_000,
|
|
39
38
|
concurrency: 500,
|
package/entry.js
CHANGED
|
@@ -27,6 +27,7 @@ import getCliParams from './lib/cli/cli.js';
|
|
|
27
27
|
const simpleConfig = await import('./dungeons/simple.js');
|
|
28
28
|
finalConfig = { ...simpleConfig.default, ...cliConfig };
|
|
29
29
|
}
|
|
30
|
+
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
const result = await main(finalConfig);
|
|
@@ -35,9 +36,12 @@ import getCliParams from './lib/cli/cli.js';
|
|
|
35
36
|
const recordsPerSecond = result.eventCount / result.time.delta * 1000;
|
|
36
37
|
console.log(`⚡ Records per second: ${recordsPerSecond.toFixed(2)}`);
|
|
37
38
|
|
|
39
|
+
// @ts-ignore
|
|
38
40
|
if (result.errors?.length) {
|
|
41
|
+
// @ts-ignore
|
|
39
42
|
console.error(`\n❗ Errors encountered: ${result.errors.length}`);
|
|
40
43
|
if (cliConfig.verbose) {
|
|
44
|
+
// @ts-ignore
|
|
41
45
|
result.errors.forEach(err => console.error(` ${err}`));
|
|
42
46
|
}
|
|
43
47
|
} else {
|
package/index.js
CHANGED
|
@@ -29,16 +29,13 @@ import { makeMirror } from './lib/generators/mirror.js';
|
|
|
29
29
|
import { makeGroupProfile, makeProfile } from './lib/generators/profiles.js';
|
|
30
30
|
|
|
31
31
|
// Utilities
|
|
32
|
-
import { generateLineChart } from './lib/utils/chart.js';
|
|
33
32
|
|
|
34
33
|
// External dependencies
|
|
35
34
|
import dayjs from "dayjs";
|
|
36
35
|
import utc from "dayjs/plugin/utc.js";
|
|
37
36
|
import functions from '@google-cloud/functions-framework';
|
|
38
37
|
import { timer, sLog } from 'ak-tools';
|
|
39
|
-
import
|
|
40
|
-
import path from 'path';
|
|
41
|
-
import { fileURLToPath } from 'url';
|
|
38
|
+
import { existsSync } from 'fs';
|
|
42
39
|
|
|
43
40
|
// Initialize dayjs and time constants
|
|
44
41
|
dayjs.extend(utc);
|
|
@@ -66,7 +63,6 @@ function displayConfigurationSummary(config) {
|
|
|
66
63
|
if (config.hasAnonIds) features.push('anonymous IDs');
|
|
67
64
|
if (config.hasSessionIds) features.push('session IDs');
|
|
68
65
|
if (config.alsoInferFunnels) features.push('funnel inference');
|
|
69
|
-
if (config.makeChart) features.push('chart generation');
|
|
70
66
|
if (config.writeToDisk) features.push('disk output');
|
|
71
67
|
|
|
72
68
|
if (features.length > 0) {
|
|
@@ -100,7 +96,10 @@ function displayConfigurationSummary(config) {
|
|
|
100
96
|
|
|
101
97
|
// Group analytics
|
|
102
98
|
if (config.groupKeys && config.groupKeys.length > 0) {
|
|
103
|
-
const groups = config.groupKeys.map((
|
|
99
|
+
const groups = config.groupKeys.map((group) => {
|
|
100
|
+
const [key, count] = Array.isArray(group) ? group : [group, 0];
|
|
101
|
+
return `${count} ${key}s`;
|
|
102
|
+
}).join(', ');
|
|
104
103
|
console.log(`👥 Groups: ${groups}`);
|
|
105
104
|
}
|
|
106
105
|
|
|
@@ -189,15 +188,12 @@ async function main(config) {
|
|
|
189
188
|
|
|
190
189
|
// ! DATA GENERATION ENDS HERE
|
|
191
190
|
|
|
192
|
-
// Step 10:
|
|
193
|
-
if (validatedConfig.
|
|
194
|
-
await
|
|
191
|
+
// Step 10: flush lookup tables to disk (always as CSVs)
|
|
192
|
+
if (validatedConfig.writeToDisk) {
|
|
193
|
+
await flushLookupTablesToDisk(storage, validatedConfig);
|
|
195
194
|
}
|
|
196
195
|
|
|
197
|
-
// Step
|
|
198
|
-
await flushLookupTablesToDisk(storage, validatedConfig);
|
|
199
|
-
|
|
200
|
-
// Step 11b: Flush other storage containers to disk (if writeToDisk enabled)
|
|
196
|
+
// Step 11: Flush other storage containers to disk (if writeToDisk enabled)
|
|
201
197
|
if (validatedConfig.writeToDisk) {
|
|
202
198
|
await flushStorageToDisk(storage, validatedConfig);
|
|
203
199
|
}
|
|
@@ -217,7 +213,7 @@ async function main(config) {
|
|
|
217
213
|
return {
|
|
218
214
|
...extractedData,
|
|
219
215
|
importResults,
|
|
220
|
-
files: extractFileInfo(storage),
|
|
216
|
+
files: await extractFileInfo(storage, validatedConfig),
|
|
221
217
|
time: { start, end, delta, human },
|
|
222
218
|
operations: context.getOperations(),
|
|
223
219
|
eventCount: context.getEventCount(),
|
|
@@ -416,28 +412,6 @@ async function generateGroupSCDs(context) {
|
|
|
416
412
|
}
|
|
417
413
|
}
|
|
418
414
|
|
|
419
|
-
/**
|
|
420
|
-
* Generate charts for data visualization
|
|
421
|
-
* @param {Context} context - Context object
|
|
422
|
-
*/
|
|
423
|
-
async function generateCharts(context) {
|
|
424
|
-
const { config, storage } = context;
|
|
425
|
-
|
|
426
|
-
if (config.makeChart && storage.eventData?.length > 0) {
|
|
427
|
-
const chartPath = typeof config.makeChart === 'string'
|
|
428
|
-
? config.makeChart
|
|
429
|
-
: `./${config.simulationName}-timeline`;
|
|
430
|
-
|
|
431
|
-
await generateLineChart(storage.eventData, undefined, chartPath);
|
|
432
|
-
|
|
433
|
-
if (context.isCLI() || config.verbose) {
|
|
434
|
-
console.log(`📊 Chart generated: ${chartPath}`);
|
|
435
|
-
} else {
|
|
436
|
-
sLog("Chart generated", { path: chartPath });
|
|
437
|
-
}
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
|
|
441
415
|
/**
|
|
442
416
|
* Flush lookup tables to disk (always runs, regardless of writeToDisk setting)
|
|
443
417
|
* @param {import('./types').Storage} storage - Storage containers
|
|
@@ -502,11 +476,13 @@ async function flushStorageToDisk(storage, config) {
|
|
|
502
476
|
/**
|
|
503
477
|
* Extract file information from storage containers
|
|
504
478
|
* @param {import('./types').Storage} storage - Storage object
|
|
505
|
-
* @
|
|
479
|
+
* @param {import('./types').Dungeon} config - Configuration object
|
|
480
|
+
* @returns {Promise<string[]>} Array of file paths
|
|
506
481
|
*/
|
|
507
|
-
function extractFileInfo(storage) {
|
|
482
|
+
async function extractFileInfo(storage, config) {
|
|
508
483
|
const files = [];
|
|
509
484
|
|
|
485
|
+
// Try to get paths from containers first
|
|
510
486
|
Object.values(storage).forEach(container => {
|
|
511
487
|
if (Array.isArray(container)) {
|
|
512
488
|
container.forEach(subContainer => {
|
|
@@ -519,6 +495,55 @@ function extractFileInfo(storage) {
|
|
|
519
495
|
}
|
|
520
496
|
});
|
|
521
497
|
|
|
498
|
+
// If no files found from containers and writeToDisk is enabled, scan the data directory
|
|
499
|
+
if (files.length === 0 && config.writeToDisk) {
|
|
500
|
+
try {
|
|
501
|
+
const fs = await import('fs');
|
|
502
|
+
const path = await import('path');
|
|
503
|
+
|
|
504
|
+
let dataDir = path.resolve("./data");
|
|
505
|
+
if (!fs.existsSync(dataDir)) {
|
|
506
|
+
dataDir = path.resolve("./");
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
if (fs.existsSync(dataDir)) {
|
|
510
|
+
const allFiles = fs.readdirSync(dataDir);
|
|
511
|
+
const simulationName = config.name;
|
|
512
|
+
|
|
513
|
+
// Filter files that match our patterns and were likely created by this run
|
|
514
|
+
const relevantFiles = allFiles.filter(file => {
|
|
515
|
+
// Skip system files
|
|
516
|
+
if (file.startsWith('.')) return false;
|
|
517
|
+
|
|
518
|
+
// If we have a simulation name, only include files with that prefix
|
|
519
|
+
if (simulationName && !file.startsWith(simulationName)) {
|
|
520
|
+
return false;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
// Check for common patterns
|
|
524
|
+
const hasEventPattern = file.includes('-EVENTS.');
|
|
525
|
+
const hasUserPattern = file.includes('-USERS.');
|
|
526
|
+
const hasScdPattern = file.includes('-SCD.');
|
|
527
|
+
const hasGroupPattern = file.includes('-GROUPS.');
|
|
528
|
+
const hasLookupPattern = file.includes('-LOOKUP.');
|
|
529
|
+
const hasAdspendPattern = file.includes('-ADSPEND.');
|
|
530
|
+
const hasMirrorPattern = file.includes('-MIRROR.');
|
|
531
|
+
|
|
532
|
+
return hasEventPattern || hasUserPattern || hasScdPattern ||
|
|
533
|
+
hasGroupPattern || hasLookupPattern || hasAdspendPattern || hasMirrorPattern;
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
// Convert to full paths
|
|
537
|
+
relevantFiles.forEach(file => {
|
|
538
|
+
files.push(path.join(dataDir, file));
|
|
539
|
+
});
|
|
540
|
+
}
|
|
541
|
+
} catch (error) {
|
|
542
|
+
// If scanning fails, just return empty array
|
|
543
|
+
console.warn('Warning: Could not scan data directory for files:', error.message);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
522
547
|
return files;
|
|
523
548
|
}
|
|
524
549
|
|
package/lib/cli/cli.js
CHANGED
|
@@ -160,13 +160,6 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
|
|
|
160
160
|
type: 'boolean',
|
|
161
161
|
coerce: boolCoerce
|
|
162
162
|
})
|
|
163
|
-
.option("makeChart", {
|
|
164
|
-
alias: 'mc',
|
|
165
|
-
demandOption: false,
|
|
166
|
-
describe: 'create a PNG chart from data',
|
|
167
|
-
type: 'boolean',
|
|
168
|
-
coerce: boolCoerce
|
|
169
|
-
})
|
|
170
163
|
.option("hasAdSpend", {
|
|
171
164
|
alias: 'ads',
|
|
172
165
|
demandOption: false,
|
|
@@ -223,6 +216,12 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
|
|
|
223
216
|
type: 'boolean',
|
|
224
217
|
coerce: boolCoerce
|
|
225
218
|
})
|
|
219
|
+
.option("name", {
|
|
220
|
+
alias: 'n',
|
|
221
|
+
demandOption: false,
|
|
222
|
+
describe: 'custom name for generated files (prefix)',
|
|
223
|
+
type: 'string'
|
|
224
|
+
})
|
|
226
225
|
|
|
227
226
|
.help()
|
|
228
227
|
.wrap(null)
|
|
@@ -236,7 +235,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
|
|
|
236
235
|
}
|
|
237
236
|
|
|
238
237
|
|
|
239
|
-
function boolCoerce(value
|
|
238
|
+
function boolCoerce(value) {
|
|
240
239
|
if (typeof value === 'boolean') return value;
|
|
241
240
|
if (typeof value === 'string') {
|
|
242
241
|
return value.toLowerCase() === 'true';
|
|
@@ -101,7 +101,6 @@ export function validateDungeonConfig(config) {
|
|
|
101
101
|
region = "US",
|
|
102
102
|
writeToDisk = false,
|
|
103
103
|
verbose = true,
|
|
104
|
-
makeChart = false,
|
|
105
104
|
soup = {},
|
|
106
105
|
hook = (record) => record,
|
|
107
106
|
hasAdSpend = false,
|
|
@@ -115,13 +114,13 @@ export function validateDungeonConfig(config) {
|
|
|
115
114
|
hasIOSDevices = false,
|
|
116
115
|
alsoInferFunnels = false,
|
|
117
116
|
name = "",
|
|
118
|
-
batchSize =
|
|
119
|
-
concurrency
|
|
117
|
+
batchSize = 2_500_000,
|
|
118
|
+
concurrency = 1
|
|
120
119
|
} = config;
|
|
121
120
|
|
|
122
|
-
//
|
|
123
|
-
if (concurrency === undefined || concurrency === null) {
|
|
124
|
-
concurrency =
|
|
121
|
+
// Allow concurrency override from config (default is now 1)
|
|
122
|
+
if (config.concurrency === undefined || config.concurrency === null) {
|
|
123
|
+
concurrency = 1;
|
|
125
124
|
}
|
|
126
125
|
|
|
127
126
|
// Ensure defaults for deep objects
|
|
@@ -136,9 +135,10 @@ export function validateDungeonConfig(config) {
|
|
|
136
135
|
throw new Error("Either epochStart or numDays must be provided");
|
|
137
136
|
}
|
|
138
137
|
|
|
139
|
-
//
|
|
140
|
-
|
|
141
|
-
|
|
138
|
+
// Use provided name if non-empty string, otherwise generate one
|
|
139
|
+
if (!name || name === "") {
|
|
140
|
+
name = makeName();
|
|
141
|
+
}
|
|
142
142
|
|
|
143
143
|
// Validate events
|
|
144
144
|
if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
|
|
@@ -195,7 +195,7 @@ export function validateDungeonConfig(config) {
|
|
|
195
195
|
const definedEvents = events.map(e => e.event);
|
|
196
196
|
const missingEvents = eventInFunnels.filter(event => !definedEvents.includes(event));
|
|
197
197
|
if (missingEvents.length) {
|
|
198
|
-
throw new Error(`Funnel sequences contain events that are not defined in the events config:\n${missingEvents.join(', ')}\nPlease ensure all events in funnel sequences are defined in the events array.`);
|
|
198
|
+
throw new Error(`Funnel sequences contain events that are not defined in the events config:\n\n${missingEvents.join(', ')}\n\nPlease ensure all events in funnel sequences are defined in the events array.`);
|
|
199
199
|
}
|
|
200
200
|
|
|
201
201
|
|
|
@@ -230,7 +230,6 @@ export function validateDungeonConfig(config) {
|
|
|
230
230
|
region,
|
|
231
231
|
writeToDisk,
|
|
232
232
|
verbose,
|
|
233
|
-
makeChart,
|
|
234
233
|
soup,
|
|
235
234
|
hook,
|
|
236
235
|
hasAdSpend,
|
|
@@ -242,8 +241,7 @@ export function validateDungeonConfig(config) {
|
|
|
242
241
|
hasAndroidDevices,
|
|
243
242
|
hasDesktopDevices,
|
|
244
243
|
hasIOSDevices,
|
|
245
|
-
|
|
246
|
-
name: config.name
|
|
244
|
+
name
|
|
247
245
|
};
|
|
248
246
|
|
|
249
247
|
return validatedConfig;
|
package/lib/core/context.js
CHANGED
|
@@ -40,6 +40,14 @@ function createDefaults(config, campaignData) {
|
|
|
40
40
|
const weighedBrowsers = u.weighArray(devices.browsers);
|
|
41
41
|
const weighedCampaigns = u.weighArray(campaignData);
|
|
42
42
|
|
|
43
|
+
// PERFORMANCE: Pre-compute device pools based on config to avoid rebuilding in makeEvent
|
|
44
|
+
const devicePools = {
|
|
45
|
+
android: config.hasAndroidDevices ? weighedAndroidDevices : [],
|
|
46
|
+
ios: config.hasIOSDevices ? weighedIOSDevices : [],
|
|
47
|
+
desktop: config.hasDesktopDevices ? weighedDesktopDevices : []
|
|
48
|
+
};
|
|
49
|
+
const allDevices = [...devicePools.android, ...devicePools.ios, ...devicePools.desktop];
|
|
50
|
+
|
|
43
51
|
return {
|
|
44
52
|
locationsUsers: () => weighedLocationsUsers,
|
|
45
53
|
locationsEvents: () => weighedLocationsEvents,
|
|
@@ -47,7 +55,11 @@ function createDefaults(config, campaignData) {
|
|
|
47
55
|
androidDevices: () => weighedAndroidDevices,
|
|
48
56
|
desktopDevices: () => weighedDesktopDevices,
|
|
49
57
|
browsers: () => weighedBrowsers,
|
|
50
|
-
campaigns: () => weighedCampaigns
|
|
58
|
+
campaigns: () => weighedCampaigns,
|
|
59
|
+
|
|
60
|
+
// PERFORMANCE: Pre-computed device pools
|
|
61
|
+
devicePools,
|
|
62
|
+
allDevices
|
|
51
63
|
};
|
|
52
64
|
}
|
|
53
65
|
|
package/lib/core/storage.js
CHANGED
|
@@ -65,13 +65,15 @@ export async function createHookArray(arr = [], opts) {
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
function getWritePath() {
|
|
68
|
+
const gzipSuffix = (config.gzip && !writeDir?.startsWith('gs://')) ? '.gz' : '';
|
|
69
|
+
|
|
68
70
|
if (isBatchMode) {
|
|
69
|
-
if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
|
|
70
|
-
return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
|
|
71
|
+
if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}${gzipSuffix}`;
|
|
72
|
+
return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}${gzipSuffix}`);
|
|
71
73
|
}
|
|
72
74
|
else {
|
|
73
|
-
if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
|
|
74
|
-
return path.join(writeDir, `${filepath}.${format}`);
|
|
75
|
+
if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}${gzipSuffix}`;
|
|
76
|
+
return path.join(writeDir, `${filepath}.${format}${gzipSuffix}`);
|
|
75
77
|
}
|
|
76
78
|
}
|
|
77
79
|
|
|
@@ -153,12 +155,19 @@ export async function createHookArray(arr = [], opts) {
|
|
|
153
155
|
console.log(`\n\twriting ${writePath}\n`);
|
|
154
156
|
}
|
|
155
157
|
|
|
158
|
+
const streamOptions = {
|
|
159
|
+
gzip: config.gzip || false
|
|
160
|
+
};
|
|
161
|
+
|
|
156
162
|
switch (format) {
|
|
157
163
|
case "csv":
|
|
158
|
-
writeResult = await u.streamCSV(writePath, data);
|
|
164
|
+
writeResult = await u.streamCSV(writePath, data, streamOptions);
|
|
159
165
|
break;
|
|
160
166
|
case "json":
|
|
161
|
-
writeResult = await u.streamJSON(writePath, data);
|
|
167
|
+
writeResult = await u.streamJSON(writePath, data, streamOptions);
|
|
168
|
+
break;
|
|
169
|
+
case "parquet":
|
|
170
|
+
writeResult = await u.streamParquet(writePath, data, streamOptions);
|
|
162
171
|
break;
|
|
163
172
|
default:
|
|
164
173
|
throw new Error(`format ${format} is not supported`);
|
|
@@ -219,12 +228,15 @@ export class StorageManager {
|
|
|
219
228
|
async initializeContainers() {
|
|
220
229
|
const { config } = this.context;
|
|
221
230
|
|
|
231
|
+
// Validate configuration for potential data loss scenarios
|
|
232
|
+
this.validateConfiguration(config);
|
|
233
|
+
|
|
222
234
|
/** @type {Storage} */
|
|
223
235
|
const storage = {
|
|
224
236
|
eventData: await createHookArray([], {
|
|
225
237
|
hook: config.hook,
|
|
226
238
|
type: "event",
|
|
227
|
-
filepath: `${config.
|
|
239
|
+
filepath: `${config.name}-EVENTS`,
|
|
228
240
|
format: config.format || "csv",
|
|
229
241
|
concurrency: config.concurrency || 1,
|
|
230
242
|
context: this.context
|
|
@@ -233,7 +245,7 @@ export class StorageManager {
|
|
|
233
245
|
userProfilesData: await createHookArray([], {
|
|
234
246
|
hook: config.hook,
|
|
235
247
|
type: "user",
|
|
236
|
-
filepath: `${config.
|
|
248
|
+
filepath: `${config.name}-USERS`,
|
|
237
249
|
format: config.format || "csv",
|
|
238
250
|
concurrency: config.concurrency || 1,
|
|
239
251
|
context: this.context
|
|
@@ -242,7 +254,7 @@ export class StorageManager {
|
|
|
242
254
|
adSpendData: await createHookArray([], {
|
|
243
255
|
hook: config.hook,
|
|
244
256
|
type: "ad-spend",
|
|
245
|
-
filepath: `${config.
|
|
257
|
+
filepath: `${config.name}-ADSPEND`,
|
|
246
258
|
format: config.format || "csv",
|
|
247
259
|
concurrency: config.concurrency || 1,
|
|
248
260
|
context: this.context
|
|
@@ -255,7 +267,7 @@ export class StorageManager {
|
|
|
255
267
|
mirrorEventData: await createHookArray([], {
|
|
256
268
|
hook: config.hook,
|
|
257
269
|
type: "mirror",
|
|
258
|
-
filepath: `${config.
|
|
270
|
+
filepath: `${config.name}-MIRROR`,
|
|
259
271
|
format: config.format || "csv",
|
|
260
272
|
concurrency: config.concurrency || 1,
|
|
261
273
|
context: this.context
|
|
@@ -268,7 +280,7 @@ export class StorageManager {
|
|
|
268
280
|
const scdArray = await createHookArray([], {
|
|
269
281
|
hook: config.hook,
|
|
270
282
|
type: "scd",
|
|
271
|
-
filepath: `${config.
|
|
283
|
+
filepath: `${config.name}-${scdKey}-SCD`,
|
|
272
284
|
format: config.format || "csv",
|
|
273
285
|
concurrency: config.concurrency || 1,
|
|
274
286
|
context: this.context
|
|
@@ -284,7 +296,7 @@ export class StorageManager {
|
|
|
284
296
|
const groupArray = await createHookArray([], {
|
|
285
297
|
hook: config.hook,
|
|
286
298
|
type: "group",
|
|
287
|
-
filepath: `${config.
|
|
299
|
+
filepath: `${config.name}-${groupKey}-GROUPS`,
|
|
288
300
|
format: config.format || "csv",
|
|
289
301
|
concurrency: config.concurrency || 1,
|
|
290
302
|
context: this.context
|
|
@@ -300,7 +312,7 @@ export class StorageManager {
|
|
|
300
312
|
const lookupArray = await createHookArray([], {
|
|
301
313
|
hook: config.hook,
|
|
302
314
|
type: "lookup",
|
|
303
|
-
filepath: `${config.
|
|
315
|
+
filepath: `${config.name}-${lookupConfig.key}-LOOKUP`,
|
|
304
316
|
format: "csv", // Always force CSV for lookup tables
|
|
305
317
|
concurrency: config.concurrency || 1,
|
|
306
318
|
context: this.context
|
|
@@ -312,4 +324,24 @@ export class StorageManager {
|
|
|
312
324
|
|
|
313
325
|
return storage;
|
|
314
326
|
}
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Validates configuration to prevent data loss scenarios
|
|
330
|
+
* @param {Object} config - Configuration object
|
|
331
|
+
*/
|
|
332
|
+
validateConfiguration(config) {
|
|
333
|
+
// Check for potential data loss scenario: writeToDisk=false with low batchSize
|
|
334
|
+
if (config.writeToDisk === false) {
|
|
335
|
+
const batchSize = config.batchSize || 1_000_000;
|
|
336
|
+
const numEvents = config.numEvents || 0;
|
|
337
|
+
|
|
338
|
+
if (batchSize < numEvents) {
|
|
339
|
+
throw new Error(
|
|
340
|
+
`Configuration error: writeToDisk is explicitly set to false but batchSize (${batchSize}) is lower than numEvents (${numEvents}). ` +
|
|
341
|
+
`This would result in data loss as batched data would be discarded. ` +
|
|
342
|
+
`Either set writeToDisk to true, increase batchSize to be >= numEvents, or provide a Mixpanel token to send data directly.`
|
|
343
|
+
);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
315
347
|
}
|
package/lib/generators/events.js
CHANGED
|
@@ -66,13 +66,12 @@ export async function makeEvent(
|
|
|
66
66
|
// Create base event template
|
|
67
67
|
const eventTemplate = {
|
|
68
68
|
event: chosenEvent.event,
|
|
69
|
-
source: "dm4",
|
|
69
|
+
// source: "dm4",
|
|
70
70
|
time: "",
|
|
71
71
|
insert_id: "",
|
|
72
72
|
};
|
|
73
73
|
|
|
74
74
|
let defaultProps = {};
|
|
75
|
-
let devicePool = [];
|
|
76
75
|
|
|
77
76
|
// Add default properties based on configuration
|
|
78
77
|
if (hasLocation) {
|
|
@@ -82,32 +81,30 @@ export async function makeEvent(
|
|
|
82
81
|
if (hasBrowser) {
|
|
83
82
|
defaultProps.browser = u.choose(defaults.browsers());
|
|
84
83
|
}
|
|
85
|
-
|
|
86
|
-
// Build device pool based on enabled device types
|
|
87
|
-
if (hasAndroidDevices) devicePool.push(defaults.androidDevices());
|
|
88
|
-
if (hasIOSDevices) devicePool.push(defaults.iOSDevices());
|
|
89
|
-
if (hasDesktopDevices) devicePool.push(defaults.desktopDevices());
|
|
90
84
|
|
|
91
85
|
// Add campaigns with attribution likelihood
|
|
92
86
|
if (hasCampaigns && chance.bool({ likelihood: 25 })) {
|
|
93
87
|
defaultProps.campaigns = u.pickRandom(defaults.campaigns());
|
|
94
88
|
}
|
|
95
89
|
|
|
96
|
-
//
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
defaultProps.device = u.pickRandom(devices);
|
|
90
|
+
// PERFORMANCE: Use pre-computed device pool instead of rebuilding every time
|
|
91
|
+
if (defaults.allDevices.length) {
|
|
92
|
+
defaultProps.device = u.pickRandom(defaults.allDevices);
|
|
100
93
|
}
|
|
101
94
|
|
|
102
95
|
// Set event time using TimeSoup for realistic distribution
|
|
103
96
|
if (earliestTime) {
|
|
104
97
|
if (isFirstEvent) {
|
|
105
|
-
//
|
|
106
|
-
|
|
98
|
+
// PERFORMANCE: Direct numeric calculation instead of dayjs object creation
|
|
99
|
+
const shiftedTimestamp = earliestTime + context.TIME_SHIFT_SECONDS;
|
|
100
|
+
eventTemplate.time = dayjs.unix(shiftedTimestamp).toISOString();
|
|
107
101
|
} else {
|
|
108
|
-
// Get time from TimeSoup and apply precomputed time shift
|
|
102
|
+
// Get time from TimeSoup (returns ISO string) and apply precomputed time shift
|
|
109
103
|
const soupTime = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
|
|
110
|
-
|
|
104
|
+
// PERFORMANCE: Parse ISO directly to milliseconds, add shift, convert back to ISO with one dayjs call
|
|
105
|
+
const soupTimestamp = new Date(soupTime).getTime() / 1000; // Convert to unix seconds
|
|
106
|
+
const shiftedTimestamp = soupTimestamp + context.TIME_SHIFT_SECONDS;
|
|
107
|
+
eventTemplate.time = dayjs.unix(shiftedTimestamp).toISOString();
|
|
111
108
|
}
|
|
112
109
|
}
|
|
113
110
|
|
|
@@ -133,7 +130,9 @@ export async function makeEvent(
|
|
|
133
130
|
// PERFORMANCE: Process properties directly without creating intermediate object
|
|
134
131
|
// Add custom properties from event configuration
|
|
135
132
|
if (chosenEvent.properties) {
|
|
136
|
-
|
|
133
|
+
const eventKeys = Object.keys(chosenEvent.properties);
|
|
134
|
+
for (let i = 0; i < eventKeys.length; i++) {
|
|
135
|
+
const key = eventKeys[i];
|
|
137
136
|
try {
|
|
138
137
|
eventTemplate[key] = u.choose(chosenEvent.properties[key]);
|
|
139
138
|
} catch (e) {
|
|
@@ -145,7 +144,9 @@ export async function makeEvent(
|
|
|
145
144
|
|
|
146
145
|
// Add super properties (override event properties if needed)
|
|
147
146
|
if (superProps) {
|
|
148
|
-
|
|
147
|
+
const superKeys = Object.keys(superProps);
|
|
148
|
+
for (let i = 0; i < superKeys.length; i++) {
|
|
149
|
+
const key = superKeys[i];
|
|
149
150
|
try {
|
|
150
151
|
eventTemplate[key] = u.choose(superProps[key]);
|
|
151
152
|
} catch (e) {
|