make-mp-data 2.1.6 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -0
- package/dungeons/adspend.js +2 -2
- package/dungeons/ai-chat-analytics-ed.js +3 -2
- package/dungeons/anon.js +2 -2
- package/dungeons/array-of-object-loopup.js +181 -0
- package/dungeons/benchmark-heavy.js +241 -0
- package/dungeons/benchmark-light.js +141 -0
- package/dungeons/big.js +9 -8
- package/dungeons/business.js +2 -1
- package/dungeons/clinch-agi.js +632 -0
- package/dungeons/complex.js +3 -2
- package/dungeons/copilot.js +383 -0
- package/dungeons/ecommerce-store.js +0 -0
- package/dungeons/experiments.js +5 -4
- package/dungeons/foobar.js +101 -101
- package/dungeons/funnels.js +2 -2
- package/dungeons/gaming.js +3 -2
- package/dungeons/harness/harness-education.js +988 -0
- package/dungeons/harness/harness-fintech.js +976 -0
- package/dungeons/harness/harness-food.js +985 -0
- package/dungeons/harness/harness-gaming.js +1178 -0
- package/dungeons/harness/harness-media.js +961 -0
- package/dungeons/harness/harness-sass.js +923 -0
- package/dungeons/harness/harness-social.js +928 -0
- package/dungeons/kurby.js +211 -0
- package/dungeons/media.js +5 -4
- package/dungeons/mil.js +4 -3
- package/dungeons/mirror.js +2 -2
- package/dungeons/money2020-ed.js +8 -7
- package/dungeons/sanity.js +3 -2
- package/dungeons/scd.js +3 -2
- package/dungeons/simple.js +29 -14
- package/dungeons/strict-event-test.js +30 -0
- package/dungeons/student-teacher.js +3 -2
- package/dungeons/text-generation.js +84 -85
- package/dungeons/too-big-events.js +166 -0
- package/dungeons/uday-schema.json +220 -0
- package/dungeons/userAgent.js +4 -3
- package/index.js +41 -54
- package/lib/core/config-validator.js +122 -7
- package/lib/core/context.js +7 -14
- package/lib/core/storage.js +60 -30
- package/lib/generators/adspend.js +12 -27
- package/lib/generators/events.js +6 -7
- package/lib/generators/funnels.js +16 -5
- package/lib/generators/product-lookup.js +262 -0
- package/lib/generators/product-names.js +195 -0
- package/lib/generators/profiles.js +3 -3
- package/lib/generators/scd.js +13 -3
- package/lib/generators/text.js +17 -4
- package/lib/orchestrators/mixpanel-sender.js +251 -208
- package/lib/orchestrators/user-loop.js +57 -19
- package/lib/templates/funnels-instructions.txt +272 -0
- package/lib/templates/hook-examples.json +187 -0
- package/lib/templates/hooks-instructions.txt +295 -8
- package/lib/templates/phrases.js +473 -16
- package/lib/templates/refine-instructions.txt +485 -0
- package/lib/templates/schema-instructions.txt +239 -109
- package/lib/templates/schema.d.ts +173 -0
- package/lib/templates/verbose-schema.js +140 -206
- package/lib/utils/ai.js +853 -77
- package/lib/utils/chart.js +210 -0
- package/lib/utils/function-registry.js +285 -0
- package/lib/utils/json-evaluator.js +172 -0
- package/lib/utils/logger.js +38 -0
- package/lib/utils/mixpanel.js +101 -0
- package/lib/utils/project.js +3 -2
- package/lib/utils/utils.js +41 -4
- package/package.json +13 -19
- package/types.d.ts +15 -5
- package/lib/generators/text-bak-old.js +0 -1121
- package/lib/orchestrators/worker-manager.js +0 -203
- package/lib/templates/phrases-bak.js +0 -925
- package/lib/templates/prompt (old).txt +0 -98
- package/lib/templates/scratch-dungeon-template.js +0 -116
- package/lib/templates/textQuickTest.js +0 -172
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
import dayjs from "dayjs";
|
|
12
12
|
import { makeName } from "ak-tools";
|
|
13
13
|
import * as u from "../utils/utils.js";
|
|
14
|
-
import os from "os";
|
|
15
14
|
|
|
16
15
|
/**
|
|
17
16
|
* Infers funnels from the provided events
|
|
@@ -21,7 +20,10 @@ import os from "os";
|
|
|
21
20
|
function inferFunnels(events) {
|
|
22
21
|
const createdFunnels = [];
|
|
23
22
|
const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
|
|
24
|
-
const
|
|
23
|
+
const strictEvents = events.filter((e) => e.isStrictEvent).map((e) => e.event);
|
|
24
|
+
const usageEvents = events
|
|
25
|
+
.filter((e) => !e.isFirstEvent && !e.isStrictEvent)
|
|
26
|
+
.map((e) => e.event);
|
|
25
27
|
const numFunnelsToCreate = Math.ceil(usageEvents.length);
|
|
26
28
|
|
|
27
29
|
/** @type {import('../../types.js').Funnel} */
|
|
@@ -55,8 +57,8 @@ function inferFunnels(events) {
|
|
|
55
57
|
for (let i = 1; i < numFunnelsToCreate; i++) {
|
|
56
58
|
/** @type {import('../../types.js').Funnel} */
|
|
57
59
|
const funnel = { ...u.deepClone(funnelTemplate) };
|
|
58
|
-
funnel.conversionRate = u.integer(
|
|
59
|
-
funnel.timeToConvert = u.integer(
|
|
60
|
+
funnel.conversionRate = u.integer(10, 50);
|
|
61
|
+
funnel.timeToConvert = u.integer(24, 72);
|
|
60
62
|
funnel.weight = u.integer(1, 10);
|
|
61
63
|
const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
|
|
62
64
|
funnel.sequence = sequence;
|
|
@@ -75,6 +77,10 @@ function inferFunnels(events) {
|
|
|
75
77
|
export function validateDungeonConfig(config) {
|
|
76
78
|
const chance = u.getChance();
|
|
77
79
|
|
|
80
|
+
// Transform SCD props to regular props if credentials are missing
|
|
81
|
+
// This MUST happen BEFORE we extract values from the config
|
|
82
|
+
transformSCDPropsWithoutCredentials(config);
|
|
83
|
+
|
|
78
84
|
// Extract configuration with defaults
|
|
79
85
|
let {
|
|
80
86
|
seed,
|
|
@@ -101,6 +107,7 @@ export function validateDungeonConfig(config) {
|
|
|
101
107
|
region = "US",
|
|
102
108
|
writeToDisk = false,
|
|
103
109
|
verbose = true,
|
|
110
|
+
makeChart = false,
|
|
104
111
|
soup = {},
|
|
105
112
|
hook = (record) => record,
|
|
106
113
|
hasAdSpend = false,
|
|
@@ -115,7 +122,8 @@ export function validateDungeonConfig(config) {
|
|
|
115
122
|
alsoInferFunnels = false,
|
|
116
123
|
name = "",
|
|
117
124
|
batchSize = 2_500_000,
|
|
118
|
-
concurrency = 1
|
|
125
|
+
concurrency = 1,
|
|
126
|
+
strictEventCount = false
|
|
119
127
|
} = config;
|
|
120
128
|
|
|
121
129
|
// Allow concurrency override from config (default is now 1)
|
|
@@ -123,6 +131,12 @@ export function validateDungeonConfig(config) {
|
|
|
123
131
|
concurrency = 1;
|
|
124
132
|
}
|
|
125
133
|
|
|
134
|
+
// Force concurrency to 1 when strictEventCount is enabled
|
|
135
|
+
// This ensures the bailout check works correctly without race conditions
|
|
136
|
+
if (strictEventCount && concurrency !== 1) {
|
|
137
|
+
concurrency = 1;
|
|
138
|
+
}
|
|
139
|
+
|
|
126
140
|
// Ensure defaults for deep objects
|
|
127
141
|
if (!config.superProps) config.superProps = superProps;
|
|
128
142
|
if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
|
|
@@ -140,6 +154,31 @@ export function validateDungeonConfig(config) {
|
|
|
140
154
|
name = makeName();
|
|
141
155
|
}
|
|
142
156
|
|
|
157
|
+
// Convert string hook to function
|
|
158
|
+
if (typeof hook === 'string') {
|
|
159
|
+
try {
|
|
160
|
+
// Use eval in a controlled manner to convert the string to a function
|
|
161
|
+
// The string should be: function(record, type, meta) { ... }
|
|
162
|
+
// eslint-disable-next-line no-eval
|
|
163
|
+
hook = eval(`(${hook})`);
|
|
164
|
+
|
|
165
|
+
// Validate it's actually a function
|
|
166
|
+
if (typeof hook !== 'function') {
|
|
167
|
+
throw new Error('Hook string did not evaluate to a function');
|
|
168
|
+
}
|
|
169
|
+
} catch (error) {
|
|
170
|
+
console.warn(`\u26a0\ufe0f Failed to convert hook string to function: ${error.message}`);
|
|
171
|
+
console.warn('Using default pass-through hook');
|
|
172
|
+
hook = (record) => record;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// Ensure hook is a function
|
|
177
|
+
if (typeof hook !== 'function') {
|
|
178
|
+
console.warn('\u26a0\ufe0f Hook is not a function, using default pass-through hook');
|
|
179
|
+
hook = (record) => record;
|
|
180
|
+
}
|
|
181
|
+
|
|
143
182
|
// Validate events
|
|
144
183
|
if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
|
|
145
184
|
|
|
@@ -177,6 +216,8 @@ export function validateDungeonConfig(config) {
|
|
|
177
216
|
else {
|
|
178
217
|
evWeight = 1;
|
|
179
218
|
}
|
|
219
|
+
// Clamp weight to reasonable range (1-10) and ensure integer
|
|
220
|
+
evWeight = Math.max(1, Math.min(Math.floor(evWeight) || 1, 10));
|
|
180
221
|
return Array(evWeight).fill(event);
|
|
181
222
|
}));
|
|
182
223
|
|
|
@@ -241,7 +282,9 @@ export function validateDungeonConfig(config) {
|
|
|
241
282
|
hasAndroidDevices,
|
|
242
283
|
hasDesktopDevices,
|
|
243
284
|
hasIOSDevices,
|
|
244
|
-
name
|
|
285
|
+
name,
|
|
286
|
+
makeChart,
|
|
287
|
+
strictEventCount
|
|
245
288
|
};
|
|
246
289
|
|
|
247
290
|
return validatedConfig;
|
|
@@ -270,4 +313,76 @@ export function validateRequiredConfig(config) {
|
|
|
270
313
|
return true;
|
|
271
314
|
}
|
|
272
315
|
|
|
273
|
-
|
|
316
|
+
/**
|
|
317
|
+
* Transforms SCD properties to regular user/group properties when service account credentials are missing
|
|
318
|
+
* ONLY applies to UI jobs - programmatic usage always generates SCD files
|
|
319
|
+
* @param {Partial<Dungeon>} config - Configuration object
|
|
320
|
+
* @returns {void} Modifies config in place
|
|
321
|
+
*/
|
|
322
|
+
function transformSCDPropsWithoutCredentials(config) {
|
|
323
|
+
const { serviceAccount, projectId, serviceSecret, scdProps, isUIJob, token } = config;
|
|
324
|
+
|
|
325
|
+
// If no SCD props configured, nothing to validate
|
|
326
|
+
if (!scdProps || Object.keys(scdProps).length === 0) {
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// If we have all credentials, SCD import can proceed
|
|
331
|
+
if (serviceAccount && projectId && serviceSecret) {
|
|
332
|
+
return;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Missing credentials - handle based on job type
|
|
336
|
+
if (!isUIJob) {
|
|
337
|
+
// For programmatic/CLI usage, throw an error if trying to send SCDs to Mixpanel without credentials
|
|
338
|
+
if (token) {
|
|
339
|
+
throw new Error(
|
|
340
|
+
'Configuration error: SCD properties are configured but service credentials are missing.\n' +
|
|
341
|
+
'To import SCD data to Mixpanel, you must provide:\n' +
|
|
342
|
+
' - serviceAccount: Your Mixpanel service account username\n' +
|
|
343
|
+
' - serviceSecret: Your Mixpanel service account secret\n' +
|
|
344
|
+
' - projectId: Your Mixpanel project ID\n' +
|
|
345
|
+
'Without these credentials, SCD data cannot be imported to Mixpanel.'
|
|
346
|
+
);
|
|
347
|
+
}
|
|
348
|
+
// If not sending to Mixpanel (no token), allow generation for testing
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// UI job without credentials - convert SCD props to regular props
|
|
353
|
+
console.log('\u26a0\ufe0f Service account credentials missing - converting SCD properties to static properties');
|
|
354
|
+
|
|
355
|
+
// Ensure userProps and groupProps exist
|
|
356
|
+
if (!config.userProps) config.userProps = {};
|
|
357
|
+
if (!config.groupProps) config.groupProps = {};
|
|
358
|
+
|
|
359
|
+
// Process each SCD property
|
|
360
|
+
for (const [propKey, scdProp] of Object.entries(scdProps)) {
|
|
361
|
+
const { type = "user", values } = scdProp;
|
|
362
|
+
|
|
363
|
+
// Skip if no values
|
|
364
|
+
if (!values || JSON.stringify(values) === "{}" || JSON.stringify(values) === "[]") {
|
|
365
|
+
continue;
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Determine if this is a user or group property
|
|
369
|
+
if (type === "user") {
|
|
370
|
+
// Add to userProps
|
|
371
|
+
config.userProps[propKey] = values;
|
|
372
|
+
console.log(` \u2713 Converted user SCD property: ${propKey}`);
|
|
373
|
+
} else {
|
|
374
|
+
// Add to groupProps for the specific group type
|
|
375
|
+
if (!config.groupProps[type]) {
|
|
376
|
+
config.groupProps[type] = {};
|
|
377
|
+
}
|
|
378
|
+
config.groupProps[type][propKey] = values;
|
|
379
|
+
console.log(` \u2713 Converted group SCD property: ${propKey} (${type})`);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Clear out scdProps since we've converted everything
|
|
384
|
+
config.scdProps = {};
|
|
385
|
+
console.log('\u2713 SCD properties converted to static properties\n');
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
export { inferFunnels, transformSCDPropsWithoutCredentials };
|
package/lib/core/context.js
CHANGED
|
@@ -73,8 +73,7 @@ function createRuntimeState() {
|
|
|
73
73
|
eventCount: 0,
|
|
74
74
|
userCount: 0,
|
|
75
75
|
isBatchMode: false,
|
|
76
|
-
verbose: false
|
|
77
|
-
isCLI: false
|
|
76
|
+
verbose: false
|
|
78
77
|
};
|
|
79
78
|
}
|
|
80
79
|
|
|
@@ -82,10 +81,10 @@ function createRuntimeState() {
|
|
|
82
81
|
* Context factory that creates a complete context object for data generation
|
|
83
82
|
* @param {Dungeon} config - Validated configuration object
|
|
84
83
|
* @param {Storage|null} storage - Storage containers (optional, can be set later)
|
|
85
|
-
* @param {boolean} [
|
|
84
|
+
* @param {boolean} [_unusedCliMode] - Deprecated parameter (no longer used)
|
|
86
85
|
* @returns {Context} Context object containing all state and dependencies
|
|
87
86
|
*/
|
|
88
|
-
export function createContext(config, storage = null,
|
|
87
|
+
export function createContext(config, storage = null, _unusedCliMode = null) {
|
|
89
88
|
// Import campaign data (could be made configurable)
|
|
90
89
|
const campaignData = campaigns;
|
|
91
90
|
|
|
@@ -98,8 +97,6 @@ export function createContext(config, storage = null, isCliMode = null) {
|
|
|
98
97
|
// Set runtime flags from config
|
|
99
98
|
runtime.verbose = config.verbose || false;
|
|
100
99
|
runtime.isBatchMode = config.batchSize && config.batchSize < config.numEvents;
|
|
101
|
-
runtime.isCLI = isCliMode !== null ? isCliMode : (process.argv[1]?.endsWith('index.js') || process.argv[1]?.endsWith('entry.js') || false);
|
|
102
|
-
if (runtime.isCLI) runtime.verbose = true; // Always verbose in CLI mode
|
|
103
100
|
|
|
104
101
|
const context = {
|
|
105
102
|
config,
|
|
@@ -150,28 +147,24 @@ export function createContext(config, storage = null, isCliMode = null) {
|
|
|
150
147
|
return runtime.isBatchMode;
|
|
151
148
|
},
|
|
152
149
|
|
|
153
|
-
isCLI() {
|
|
154
|
-
return runtime.isCLI;
|
|
155
|
-
},
|
|
156
|
-
|
|
157
150
|
// Time helper methods
|
|
158
151
|
getTimeShift() {
|
|
159
|
-
const actualNow = dayjs().add(
|
|
152
|
+
const actualNow = dayjs().add(1, "day");
|
|
160
153
|
return actualNow.diff(dayjs.unix(this.FIXED_NOW), "seconds");
|
|
161
154
|
},
|
|
162
155
|
|
|
163
156
|
getDaysShift() {
|
|
164
|
-
const actualNow = dayjs().add(
|
|
157
|
+
const actualNow = dayjs().add(1, "day");
|
|
165
158
|
return actualNow.diff(dayjs.unix(this.FIXED_NOW), "days");
|
|
166
159
|
},
|
|
167
160
|
|
|
168
161
|
// Time constants (previously globals)
|
|
169
162
|
FIXED_NOW: global.FIXED_NOW,
|
|
170
163
|
FIXED_BEGIN: global.FIXED_BEGIN,
|
|
171
|
-
|
|
164
|
+
|
|
172
165
|
// PERFORMANCE: Pre-calculated time shift (instead of calculating per-event)
|
|
173
166
|
TIME_SHIFT_SECONDS: (() => {
|
|
174
|
-
const actualNow = dayjs().add(
|
|
167
|
+
const actualNow = dayjs().add(1, "day");
|
|
175
168
|
return actualNow.diff(dayjs.unix(global.FIXED_NOW), "seconds");
|
|
176
169
|
})(),
|
|
177
170
|
};
|
package/lib/core/storage.js
CHANGED
|
@@ -13,6 +13,7 @@ import pLimit from 'p-limit';
|
|
|
13
13
|
import os from "os";
|
|
14
14
|
import path from "path";
|
|
15
15
|
import * as u from "../utils/utils.js";
|
|
16
|
+
import { dataLogger as logger } from "../utils/logger.js";
|
|
16
17
|
|
|
17
18
|
/**
|
|
18
19
|
* Creates a hooked array that transforms data on push and handles batching/disk writes
|
|
@@ -32,16 +33,15 @@ export async function createHookArray(arr = [], opts) {
|
|
|
32
33
|
} = opts || {};
|
|
33
34
|
|
|
34
35
|
const FILE_CONN = pLimit(concurrency);
|
|
35
|
-
const {
|
|
36
|
-
config = {},
|
|
37
|
-
runtime = {
|
|
38
|
-
operations: 0,
|
|
39
|
-
eventCount: 0,
|
|
40
|
-
userCount: 0,
|
|
41
|
-
isBatchMode: false,
|
|
42
|
-
verbose: false
|
|
43
|
-
|
|
44
|
-
}
|
|
36
|
+
const {
|
|
37
|
+
config = {},
|
|
38
|
+
runtime = {
|
|
39
|
+
operations: 0,
|
|
40
|
+
eventCount: 0,
|
|
41
|
+
userCount: 0,
|
|
42
|
+
isBatchMode: false,
|
|
43
|
+
verbose: false
|
|
44
|
+
}
|
|
45
45
|
} = context;
|
|
46
46
|
const BATCH_SIZE = config.batchSize || 1_000_000;
|
|
47
47
|
const NODE_ENV = process.env.NODE_ENV || "unknown";
|
|
@@ -65,7 +65,7 @@ export async function createHookArray(arr = [], opts) {
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
function getWritePath() {
|
|
68
|
-
const gzipSuffix = (config.gzip
|
|
68
|
+
const gzipSuffix = (config.gzip) ? '.gz' : '';
|
|
69
69
|
|
|
70
70
|
if (isBatchMode) {
|
|
71
71
|
if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}${gzipSuffix}`;
|
|
@@ -78,22 +78,27 @@ export async function createHookArray(arr = [], opts) {
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
function getWriteDir() {
|
|
81
|
-
return
|
|
81
|
+
return writeDir;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
84
|
async function transformThenPush(item, meta) {
|
|
85
85
|
if (item === null || item === undefined) return false;
|
|
86
86
|
if (typeof item === 'object' && Object.keys(item).length === 0) return false;
|
|
87
87
|
|
|
88
|
+
// Skip hook for types already hooked in generators/orchestrators to prevent double-firing
|
|
89
|
+
// Types hooked upstream: "event" (events.js), "user" (user-loop.js), "scd" (user-loop.js)
|
|
90
|
+
// Types only hooked here: "mirror", "ad-spend", "group", "lookup"
|
|
91
|
+
const alreadyHooked = type === "event" || type === "user" || type === "scd";
|
|
92
|
+
|
|
88
93
|
// Performance optimization: skip hook overhead for passthrough hooks
|
|
89
|
-
|
|
94
|
+
// Only treat as passthrough if the function body is trivially simple (just returns its argument)
|
|
95
|
+
const hookStr = hook.toString();
|
|
96
|
+
const isPassthroughHook = hook.length === 1 || /^\s*function\s*\([^)]*\)\s*\{\s*return\s+\w+;?\s*\}\s*$/.test(hookStr) || /^\s*\(?[^)]*\)?\s*=>\s*\w+\s*$/.test(hookStr);
|
|
90
97
|
|
|
91
|
-
if (isPassthroughHook) {
|
|
98
|
+
if (alreadyHooked || isPassthroughHook) {
|
|
92
99
|
// Fast path for passthrough hooks - no transformation needed
|
|
93
100
|
if (Array.isArray(item)) {
|
|
94
|
-
|
|
95
|
-
arr.push(item[i]);
|
|
96
|
-
}
|
|
101
|
+
arr.push(...item);
|
|
97
102
|
} else {
|
|
98
103
|
arr.push(item);
|
|
99
104
|
}
|
|
@@ -101,25 +106,43 @@ export async function createHookArray(arr = [], opts) {
|
|
|
101
106
|
// Slow path for actual transformation hooks
|
|
102
107
|
const allMetaData = { ...rest, ...meta };
|
|
103
108
|
|
|
109
|
+
// Helper to validate events have required properties
|
|
110
|
+
// Note: event-type hooks are handled in the fast path (alreadyHooked),
|
|
111
|
+
// so this only runs for storage-only hook types (mirror, ad-spend, group, lookup)
|
|
112
|
+
const isValidEvent = (e) => {
|
|
113
|
+
if (!e || typeof e !== 'object') return false;
|
|
114
|
+
return true;
|
|
115
|
+
};
|
|
116
|
+
|
|
104
117
|
if (Array.isArray(item)) {
|
|
105
118
|
for (const i of item) {
|
|
106
119
|
try {
|
|
107
120
|
const enriched = await hook(i, type, allMetaData);
|
|
108
|
-
if (Array.isArray(enriched))
|
|
109
|
-
|
|
121
|
+
if (Array.isArray(enriched)) {
|
|
122
|
+
enriched.forEach(e => {
|
|
123
|
+
if (isValidEvent(e)) arr.push(e);
|
|
124
|
+
});
|
|
125
|
+
} else if (isValidEvent(enriched)) {
|
|
126
|
+
arr.push(enriched);
|
|
127
|
+
}
|
|
110
128
|
} catch (e) {
|
|
111
|
-
|
|
112
|
-
arr.push(i);
|
|
129
|
+
logger.error({ err: e }, 'Hook error during batch processing');
|
|
130
|
+
if (isValidEvent(i)) arr.push(i);
|
|
113
131
|
}
|
|
114
132
|
}
|
|
115
133
|
} else {
|
|
116
134
|
try {
|
|
117
135
|
const enriched = await hook(item, type, allMetaData);
|
|
118
|
-
if (Array.isArray(enriched))
|
|
119
|
-
|
|
136
|
+
if (Array.isArray(enriched)) {
|
|
137
|
+
enriched.forEach(e => {
|
|
138
|
+
if (isValidEvent(e)) arr.push(e);
|
|
139
|
+
});
|
|
140
|
+
} else if (isValidEvent(enriched)) {
|
|
141
|
+
arr.push(enriched);
|
|
142
|
+
}
|
|
120
143
|
} catch (e) {
|
|
121
|
-
|
|
122
|
-
arr.push(item);
|
|
144
|
+
logger.error({ err: e }, 'Hook error during single item processing');
|
|
145
|
+
if (isValidEvent(item)) arr.push(item);
|
|
123
146
|
}
|
|
124
147
|
}
|
|
125
148
|
}
|
|
@@ -134,11 +157,11 @@ export async function createHookArray(arr = [], opts) {
|
|
|
134
157
|
|
|
135
158
|
try {
|
|
136
159
|
// Create a copy of the data to write
|
|
137
|
-
const dataToWrite = arr
|
|
160
|
+
const dataToWrite = [...arr];
|
|
138
161
|
// Clear the array immediately to prevent race conditions
|
|
139
162
|
arr.length = 0;
|
|
140
|
-
|
|
141
|
-
// Write to disk
|
|
163
|
+
|
|
164
|
+
// Write to disk/cloud - always blocking to prevent OOM
|
|
142
165
|
const writeResult = await FILE_CONN(() => writeToDisk(dataToWrite, { writePath }));
|
|
143
166
|
return writeResult;
|
|
144
167
|
} finally {
|
|
@@ -153,8 +176,11 @@ export async function createHookArray(arr = [], opts) {
|
|
|
153
176
|
const { writePath } = options;
|
|
154
177
|
let writeResult;
|
|
155
178
|
|
|
156
|
-
|
|
179
|
+
const isDev = process.env.NODE_ENV !== 'production';
|
|
180
|
+
if (config.verbose && isDev) {
|
|
157
181
|
console.log(`\n\twriting ${writePath}\n`);
|
|
182
|
+
} else if (config.verbose) {
|
|
183
|
+
logger.info({ path: writePath }, `Writing ${writePath}`);
|
|
158
184
|
}
|
|
159
185
|
|
|
160
186
|
const streamOptions = {
|
|
@@ -190,7 +216,7 @@ export async function createHookArray(arr = [], opts) {
|
|
|
190
216
|
try {
|
|
191
217
|
batch++;
|
|
192
218
|
const writePath = getWritePath();
|
|
193
|
-
const dataToWrite = arr
|
|
219
|
+
const dataToWrite = [...arr];
|
|
194
220
|
arr.length = 0; // Clear array after copying data
|
|
195
221
|
await FILE_CONN(() => writeToDisk(dataToWrite, { writePath }));
|
|
196
222
|
} finally {
|
|
@@ -279,6 +305,7 @@ export class StorageManager {
|
|
|
279
305
|
// Initialize SCD tables if configured
|
|
280
306
|
if (config.scdProps && Object.keys(config.scdProps).length > 0) {
|
|
281
307
|
for (const scdKey of Object.keys(config.scdProps)) {
|
|
308
|
+
const scdConfig = config.scdProps[scdKey];
|
|
282
309
|
const scdArray = await createHookArray([], {
|
|
283
310
|
hook: config.hook,
|
|
284
311
|
type: "scd",
|
|
@@ -288,6 +315,9 @@ export class StorageManager {
|
|
|
288
315
|
context: this.context
|
|
289
316
|
});
|
|
290
317
|
scdArray.scdKey = scdKey;
|
|
318
|
+
// Store entity type (user or group) from config
|
|
319
|
+
const entityType = (typeof scdConfig === 'object' && scdConfig.type) ? scdConfig.type : 'user';
|
|
320
|
+
scdArray.entityType = entityType;
|
|
291
321
|
storage.scdTableData.push(scdArray);
|
|
292
322
|
}
|
|
293
323
|
}
|
|
@@ -29,15 +29,6 @@ export async function makeAdSpend(context, day, campaigns = null) {
|
|
|
29
29
|
const chance = u.getChance();
|
|
30
30
|
const adSpendEvents = [];
|
|
31
31
|
|
|
32
|
-
// Determine if we should apply time shift
|
|
33
|
-
// Only shift if the date is in the historical "fixed" range (before 2025)
|
|
34
|
-
const dayTimestamp = dayjs(day).unix();
|
|
35
|
-
const CUTOFF_DATE = dayjs('2025-01-01').unix(); // Dates before 2025 are considered "fixed" range
|
|
36
|
-
const shouldShift = dayTimestamp < CUTOFF_DATE;
|
|
37
|
-
|
|
38
|
-
// Get time shift from context, but only use it if we should shift
|
|
39
|
-
const timeShiftSeconds = shouldShift ? (context.TIME_SHIFT_SECONDS || 0) : 0;
|
|
40
|
-
|
|
41
32
|
for (const network of campaignConfigs) {
|
|
42
33
|
const networkCampaigns = network.utm_campaign;
|
|
43
34
|
|
|
@@ -45,8 +36,8 @@ export async function makeAdSpend(context, day, campaigns = null) {
|
|
|
45
36
|
// Skip organic campaigns
|
|
46
37
|
if (campaign === "$organic") continue;
|
|
47
38
|
|
|
48
|
-
// Generate realistic ad spend metrics
|
|
49
|
-
const adSpendEvent = createAdSpendEvent(network, campaign, day, chance
|
|
39
|
+
// Generate realistic ad spend metrics
|
|
40
|
+
const adSpendEvent = createAdSpendEvent(network, campaign, day, chance);
|
|
50
41
|
adSpendEvents.push(adSpendEvent);
|
|
51
42
|
}
|
|
52
43
|
}
|
|
@@ -60,15 +51,9 @@ export async function makeAdSpend(context, day, campaigns = null) {
|
|
|
60
51
|
* @param {string} campaign - Campaign name
|
|
61
52
|
* @param {string} day - ISO date string
|
|
62
53
|
* @param {Object} chance - Chance.js instance
|
|
63
|
-
* @param {number} timeShiftSeconds - Time shift in seconds to apply to timestamps (default: 0)
|
|
64
54
|
* @returns {Object} Ad spend event object
|
|
65
55
|
*/
|
|
66
|
-
function createAdSpendEvent(network, campaign, day, chance
|
|
67
|
-
// Apply time shift to the day timestamp
|
|
68
|
-
const dayTimestamp = dayjs(day).unix();
|
|
69
|
-
const shiftedTimestamp = dayTimestamp + timeShiftSeconds;
|
|
70
|
-
const shiftedDay = dayjs.unix(shiftedTimestamp).toISOString();
|
|
71
|
-
|
|
56
|
+
function createAdSpendEvent(network, campaign, day, chance) {
|
|
72
57
|
// Generate realistic cost
|
|
73
58
|
const cost = chance.floating({ min: 10, max: 250, fixed: 2 });
|
|
74
59
|
|
|
@@ -92,7 +77,7 @@ function createAdSpendEvent(network, campaign, day, chance, timeShiftSeconds = 0
|
|
|
92
77
|
|
|
93
78
|
return {
|
|
94
79
|
event: "$ad_spend",
|
|
95
|
-
time:
|
|
80
|
+
time: day,
|
|
96
81
|
// source: 'dm4',
|
|
97
82
|
utm_campaign: campaign,
|
|
98
83
|
campaign_id: id,
|
|
@@ -107,7 +92,7 @@ function createAdSpendEvent(network, campaign, day, chance, timeShiftSeconds = 0
|
|
|
107
92
|
views,
|
|
108
93
|
impressions,
|
|
109
94
|
cost,
|
|
110
|
-
date: dayjs
|
|
95
|
+
date: dayjs(day).format("YYYY-MM-DD"),
|
|
111
96
|
};
|
|
112
97
|
}
|
|
113
98
|
|
|
@@ -120,28 +105,28 @@ export function validateCampaigns(campaigns) {
|
|
|
120
105
|
if (!Array.isArray(campaigns)) {
|
|
121
106
|
throw new Error("Campaigns must be an array");
|
|
122
107
|
}
|
|
123
|
-
|
|
108
|
+
|
|
124
109
|
for (const network of campaigns) {
|
|
125
110
|
if (!network.utm_source || !Array.isArray(network.utm_source)) {
|
|
126
111
|
throw new Error("Each campaign network must have utm_source array");
|
|
127
112
|
}
|
|
128
|
-
|
|
113
|
+
|
|
129
114
|
if (!network.utm_campaign || !Array.isArray(network.utm_campaign)) {
|
|
130
115
|
throw new Error("Each campaign network must have utm_campaign array");
|
|
131
116
|
}
|
|
132
|
-
|
|
117
|
+
|
|
133
118
|
if (!network.utm_medium || !Array.isArray(network.utm_medium)) {
|
|
134
119
|
throw new Error("Each campaign network must have utm_medium array");
|
|
135
120
|
}
|
|
136
|
-
|
|
121
|
+
|
|
137
122
|
if (!network.utm_content || !Array.isArray(network.utm_content)) {
|
|
138
123
|
throw new Error("Each campaign network must have utm_content array");
|
|
139
124
|
}
|
|
140
|
-
|
|
125
|
+
|
|
141
126
|
if (!network.utm_term || !Array.isArray(network.utm_term)) {
|
|
142
127
|
throw new Error("Each campaign network must have utm_term array");
|
|
143
128
|
}
|
|
144
129
|
}
|
|
145
|
-
|
|
130
|
+
|
|
146
131
|
return true;
|
|
147
|
-
}
|
|
132
|
+
}
|
package/lib/generators/events.js
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
import dayjs from "dayjs";
|
|
13
13
|
import * as u from "../utils/utils.js";
|
|
14
|
+
import { dataLogger as logger } from "../utils/logger.js";
|
|
14
15
|
|
|
15
16
|
/**
|
|
16
17
|
* Creates a Mixpanel event with a flat shape
|
|
@@ -136,7 +137,7 @@ export async function makeEvent(
|
|
|
136
137
|
try {
|
|
137
138
|
eventTemplate[key] = u.choose(chosenEvent.properties[key]);
|
|
138
139
|
} catch (e) {
|
|
139
|
-
|
|
140
|
+
logger.error({ err: e, key, event: chosenEvent.event }, `Error processing property ${key} in ${chosenEvent.event} event`);
|
|
140
141
|
// Continue processing other properties
|
|
141
142
|
}
|
|
142
143
|
}
|
|
@@ -150,7 +151,7 @@ export async function makeEvent(
|
|
|
150
151
|
try {
|
|
151
152
|
eventTemplate[key] = u.choose(superProps[key]);
|
|
152
153
|
} catch (e) {
|
|
153
|
-
|
|
154
|
+
logger.error({ err: e, key }, `Error processing super property ${key}`);
|
|
154
155
|
// Continue processing other properties
|
|
155
156
|
}
|
|
156
157
|
}
|
|
@@ -168,8 +169,6 @@ export async function makeEvent(
|
|
|
168
169
|
const distinctId = eventTemplate.user_id || eventTemplate.device_id || eventTemplate.distinct_id || distinct_id;
|
|
169
170
|
const tuple = `${eventTemplate.event}-${eventTemplate.time}-${distinctId}`;
|
|
170
171
|
eventTemplate.insert_id = u.quickHash(tuple);
|
|
171
|
-
//v2 compat
|
|
172
|
-
eventTemplate.distinct_id = distinctId;
|
|
173
172
|
|
|
174
173
|
// Call hook if configured (before returning the event)
|
|
175
174
|
const { hook } = config;
|
|
@@ -258,12 +257,12 @@ function addGroupProperties(eventTemplate, groupKeys) {
|
|
|
258
257
|
|
|
259
258
|
// Empty array for group events means all events get the group property
|
|
260
259
|
if (!groupEvents.length) {
|
|
261
|
-
eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
|
|
260
|
+
eventTemplate[groupKey] = String(u.pick(u.weighNumRange(1, groupCardinality)));
|
|
262
261
|
}
|
|
263
|
-
|
|
262
|
+
|
|
264
263
|
// Only add group property if event is in the specified group events
|
|
265
264
|
if (groupEvents.includes(eventTemplate.event)) {
|
|
266
|
-
eventTemplate[groupKey] = u.pick(u.weighNumRange(1, groupCardinality));
|
|
265
|
+
eventTemplate[groupKey] = String(u.pick(u.weighNumRange(1, groupCardinality)));
|
|
267
266
|
}
|
|
268
267
|
}
|
|
269
268
|
}
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
import dayjs from "dayjs";
|
|
9
9
|
import * as u from "../utils/utils.js";
|
|
10
10
|
import { makeEvent } from "./events.js";
|
|
11
|
+
import { dataLogger as logger } from "../utils/logger.js";
|
|
11
12
|
|
|
12
13
|
/**
|
|
13
14
|
* Creates a funnel (sequence of events) for a user with conversion logic
|
|
@@ -78,6 +79,7 @@ export async function makeFunnel(context, funnel, user, firstEventTime, profile
|
|
|
78
79
|
requireRepeats = false,
|
|
79
80
|
_experimentName: expName,
|
|
80
81
|
_experimentVariant: expVariant,
|
|
82
|
+
bindPropsIndex = 0
|
|
81
83
|
} = funnel;
|
|
82
84
|
|
|
83
85
|
const { distinct_id, created, anonymousIds = [], sessionIds = [] } = user;
|
|
@@ -89,12 +91,12 @@ export async function makeFunnel(context, funnel, user, firstEventTime, profile
|
|
|
89
91
|
try {
|
|
90
92
|
chosenFunnelProps[key] = u.choose(chosenFunnelProps[key]);
|
|
91
93
|
} catch (e) {
|
|
92
|
-
|
|
94
|
+
logger.error({ err: e, key, funnel: funnel.sequence.join(" > ") }, `Error processing property ${key} in funnel`);
|
|
93
95
|
}
|
|
94
96
|
}
|
|
95
97
|
|
|
96
98
|
// Build event specifications for funnel steps
|
|
97
|
-
const funnelPossibleEvents = buildFunnelEvents(context, sequence, chosenFunnelProps, expName, expVariant);
|
|
99
|
+
const funnelPossibleEvents = buildFunnelEvents(context, sequence, chosenFunnelProps, bindPropsIndex, expName, expVariant);
|
|
98
100
|
|
|
99
101
|
// Handle repeat logic and conversion rate adjustment
|
|
100
102
|
const { processedEvents, adjustedConversionRate } = processEventRepeats(
|
|
@@ -158,14 +160,15 @@ export async function makeFunnel(context, funnel, user, firstEventTime, profile
|
|
|
158
160
|
* @param {Context} context - Context object
|
|
159
161
|
* @param {Array} sequence - Array of event names
|
|
160
162
|
* @param {Object} chosenFunnelProps - Properties to apply to all events
|
|
163
|
+
* @param {number} bindPropsIndex - Index at which to bind properties (if applicable)
|
|
161
164
|
* @param {string} [experimentName] - Name of experiment (if experiment is enabled)
|
|
162
165
|
* @param {string} [experimentVariant] - Variant name (A, B, or C)
|
|
163
166
|
* @returns {Array} Array of event specifications
|
|
164
167
|
*/
|
|
165
|
-
function buildFunnelEvents(context, sequence, chosenFunnelProps, experimentName, experimentVariant) {
|
|
168
|
+
function buildFunnelEvents(context, sequence, chosenFunnelProps, bindPropsIndex, experimentName, experimentVariant) {
|
|
166
169
|
const { config } = context;
|
|
167
170
|
|
|
168
|
-
return sequence.map((eventName) => {
|
|
171
|
+
return sequence.map((eventName, currentIndex) => {
|
|
169
172
|
// Handle $experiment_started event specially
|
|
170
173
|
if (eventName === "$experiment_started" && experimentName && experimentVariant) {
|
|
171
174
|
return {
|
|
@@ -191,13 +194,21 @@ function buildFunnelEvents(context, sequence, chosenFunnelProps, experimentName,
|
|
|
191
194
|
try {
|
|
192
195
|
eventSpec.properties[key] = u.choose(eventSpec.properties[key]);
|
|
193
196
|
} catch (e) {
|
|
194
|
-
|
|
197
|
+
logger.error({ err: e, key, event: eventSpec.event }, `Error processing property ${key} in ${eventSpec.event} event`);
|
|
195
198
|
}
|
|
196
199
|
}
|
|
197
200
|
|
|
198
201
|
// Merge funnel properties (no need to delete properties since we're creating a new object)
|
|
199
202
|
eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
|
|
200
203
|
|
|
204
|
+
|
|
205
|
+
if (bindPropsIndex && currentIndex < bindPropsIndex) {
|
|
206
|
+
// Remove funnel properties that were added but should not be bound yet
|
|
207
|
+
for (const key in chosenFunnelProps) {
|
|
208
|
+
delete eventSpec.properties[key];
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
201
212
|
return eventSpec;
|
|
202
213
|
});
|
|
203
214
|
}
|