make-mp-data 2.0.19 → 2.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dungeons/big.js +7 -6
- package/dungeons/business.js +21 -3
- package/dungeons/experiments.js +8 -7
- package/dungeons/media.js +7 -7
- package/dungeons/sanity.js +8 -14
- package/dungeons/simple.js +1 -0
- package/dungeons/student-teacher.js +426 -0
- package/dungeons/userAgent.js +7 -7
- package/entry.js +19 -3
- package/index.js +107 -7
- package/lib/cli/cli.js +8 -0
- package/lib/core/config-validator.js +244 -218
- package/lib/core/context.js +31 -16
- package/lib/core/storage.js +61 -27
- package/lib/generators/events.js +41 -18
- package/lib/orchestrators/mixpanel-sender.js +5 -2
- package/lib/orchestrators/user-loop.js +212 -181
- package/lib/orchestrators/worker-manager.js +5 -2
- package/lib/templates/abbreviated.d.ts +159 -0
- package/lib/{data → templates}/defaults.js +2 -2
- package/lib/templates/instructions.txt +78 -0
- package/lib/templates/scratch-dungeon-template.js +116 -0
- package/lib/templates/verbose-schema.js +338 -0
- package/lib/utils/ai.js +42 -64
- package/lib/utils/chart.js +5 -0
- package/lib/utils/utils.js +116 -55
- package/package.json +9 -10
- package/types.d.ts +138 -125
- package/lib/cloud-function.js +0 -20
- /package/lib/{utils/prompt.txt → templates/prompt (old).txt} +0 -0
package/lib/core/storage.js
CHANGED
|
@@ -3,7 +3,10 @@
|
|
|
3
3
|
* Extracted from index.js to eliminate global dependencies
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
/** @typedef {import('../../types').Context} Context */
|
|
6
|
+
/** @typedef {import('../../types.js').Context} Context */
|
|
7
|
+
/** @typedef {import('../../types.js').HookedArray<any>} HookedArray */
|
|
8
|
+
/** @typedef {import('../../types.js').Storage} Storage */
|
|
9
|
+
/** @typedef {import('../../types.js').hookArrayOptions<any>} hookArrayOptions */
|
|
7
10
|
|
|
8
11
|
import { existsSync } from "fs";
|
|
9
12
|
import pLimit from 'p-limit';
|
|
@@ -14,34 +17,39 @@ import * as u from "../utils/utils.js";
|
|
|
14
17
|
/**
|
|
15
18
|
* Creates a hooked array that transforms data on push and handles batching/disk writes
|
|
16
19
|
* @param {Array} arr - Base array to enhance
|
|
17
|
-
* @param {
|
|
18
|
-
* @
|
|
19
|
-
* @param {string} opts.type - Type identifier for the hook function
|
|
20
|
-
* @param {string} opts.filepath - Base filename for disk writes
|
|
21
|
-
* @param {string} opts.format - Output format ('csv' or 'json')
|
|
22
|
-
* @param {number} opts.concurrency - Max concurrent file operations
|
|
23
|
-
* @param {Context} opts.context - Context object with config, batchSize, etc.
|
|
24
|
-
* @returns {Promise<Array>} Enhanced array with hookPush and flush methods
|
|
20
|
+
* @param {hookArrayOptions} opts - Configuration options
|
|
21
|
+
* @returns {Promise<HookedArray>} Enhanced array with hookPush and flush methods
|
|
25
22
|
*/
|
|
26
|
-
export async function createHookArray(arr = [], opts
|
|
23
|
+
export async function createHookArray(arr = [], opts) {
|
|
27
24
|
const {
|
|
28
25
|
hook = a => a,
|
|
29
26
|
type = "",
|
|
30
27
|
filepath = "./defaultFile",
|
|
31
28
|
format = "csv",
|
|
32
29
|
concurrency = 1,
|
|
33
|
-
context = {},
|
|
30
|
+
context = /** @type {Context} */ ({}),
|
|
34
31
|
...rest
|
|
35
|
-
} = opts;
|
|
32
|
+
} = opts || {};
|
|
36
33
|
|
|
37
34
|
const FILE_CONN = pLimit(concurrency);
|
|
38
|
-
const {
|
|
35
|
+
const {
|
|
36
|
+
config = {},
|
|
37
|
+
runtime = {
|
|
38
|
+
operations: 0,
|
|
39
|
+
eventCount: 0,
|
|
40
|
+
userCount: 0,
|
|
41
|
+
isBatchMode: false,
|
|
42
|
+
verbose: false,
|
|
43
|
+
isCLI: false
|
|
44
|
+
}
|
|
45
|
+
} = context;
|
|
39
46
|
const BATCH_SIZE = config.batchSize || 1_000_000;
|
|
40
47
|
const NODE_ENV = process.env.NODE_ENV || "unknown";
|
|
41
48
|
|
|
42
49
|
let batch = 0;
|
|
43
50
|
let writeDir;
|
|
44
51
|
let isBatchMode = runtime.isBatchMode || false;
|
|
52
|
+
let isWriting = false; // Prevent concurrent writes
|
|
45
53
|
|
|
46
54
|
// Determine write directory
|
|
47
55
|
const dataFolder = path.resolve("./data");
|
|
@@ -77,7 +85,7 @@ export async function createHookArray(arr = [], opts = {}) {
|
|
|
77
85
|
|
|
78
86
|
// Performance optimization: skip hook overhead for passthrough hooks
|
|
79
87
|
const isPassthroughHook = hook.toString().includes('return record') || hook.length === 1;
|
|
80
|
-
|
|
88
|
+
|
|
81
89
|
if (isPassthroughHook) {
|
|
82
90
|
// Fast path for passthrough hooks - no transformation needed
|
|
83
91
|
if (Array.isArray(item)) {
|
|
@@ -112,13 +120,26 @@ export async function createHookArray(arr = [], opts = {}) {
|
|
|
112
120
|
}
|
|
113
121
|
}
|
|
114
122
|
|
|
115
|
-
|
|
123
|
+
// Check batch size and handle writes synchronously to prevent race conditions
|
|
124
|
+
if (arr.length > BATCH_SIZE && !isWriting) {
|
|
125
|
+
isWriting = true; // Lock to prevent concurrent writes
|
|
116
126
|
isBatchMode = true;
|
|
117
127
|
runtime.isBatchMode = true; // Update runtime state
|
|
118
128
|
batch++;
|
|
119
129
|
const writePath = getWritePath();
|
|
120
|
-
|
|
121
|
-
|
|
130
|
+
|
|
131
|
+
try {
|
|
132
|
+
// Create a copy of the data to write
|
|
133
|
+
const dataToWrite = [...arr];
|
|
134
|
+
// Clear the array immediately to prevent race conditions
|
|
135
|
+
arr.length = 0;
|
|
136
|
+
|
|
137
|
+
// Write to disk (this is now synchronous from the perspective of batch management)
|
|
138
|
+
const writeResult = await FILE_CONN(() => writeToDisk(dataToWrite, { writePath }));
|
|
139
|
+
return writeResult;
|
|
140
|
+
} finally {
|
|
141
|
+
isWriting = false; // Release the lock
|
|
142
|
+
}
|
|
122
143
|
} else {
|
|
123
144
|
return Promise.resolve(false);
|
|
124
145
|
}
|
|
@@ -129,7 +150,7 @@ export async function createHookArray(arr = [], opts = {}) {
|
|
|
129
150
|
let writeResult;
|
|
130
151
|
|
|
131
152
|
if (config.verbose) {
|
|
132
|
-
console.log(`\n\
|
|
153
|
+
console.log(`\n\twriting ${writePath}\n`);
|
|
133
154
|
}
|
|
134
155
|
|
|
135
156
|
switch (format) {
|
|
@@ -143,21 +164,33 @@ export async function createHookArray(arr = [], opts = {}) {
|
|
|
143
164
|
throw new Error(`format ${format} is not supported`);
|
|
144
165
|
}
|
|
145
166
|
|
|
146
|
-
|
|
167
|
+
// Array clearing now handled in transformThenPush to ensure proper timing
|
|
147
168
|
return writeResult;
|
|
148
169
|
}
|
|
149
170
|
|
|
150
171
|
async function flush() {
|
|
151
172
|
if (arr.length > 0) {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
173
|
+
// Wait for any ongoing writes to complete
|
|
174
|
+
while (isWriting) {
|
|
175
|
+
await new Promise(resolve => setTimeout(resolve, 10));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
isWriting = true;
|
|
179
|
+
try {
|
|
180
|
+
batch++;
|
|
181
|
+
const writePath = getWritePath();
|
|
182
|
+
const dataToWrite = [...arr];
|
|
183
|
+
arr.length = 0; // Clear array after copying data
|
|
184
|
+
await FILE_CONN(() => writeToDisk(dataToWrite, { writePath }));
|
|
185
|
+
} finally {
|
|
186
|
+
isWriting = false;
|
|
187
|
+
}
|
|
156
188
|
}
|
|
157
189
|
}
|
|
158
190
|
|
|
159
191
|
// Enhance the array with our methods
|
|
160
|
-
|
|
192
|
+
/** @type {HookedArray} */
|
|
193
|
+
const enrichedArray = /** @type {any} */ (arr);
|
|
161
194
|
enrichedArray.hookPush = transformThenPush;
|
|
162
195
|
enrichedArray.flush = flush;
|
|
163
196
|
enrichedArray.getWriteDir = getWriteDir;
|
|
@@ -181,11 +214,12 @@ export class StorageManager {
|
|
|
181
214
|
|
|
182
215
|
/**
|
|
183
216
|
* Initialize all storage containers for the data generation process
|
|
184
|
-
* @returns {
|
|
217
|
+
* @returns {Promise<Storage>} Storage containers object
|
|
185
218
|
*/
|
|
186
219
|
async initializeContainers() {
|
|
187
220
|
const { config } = this.context;
|
|
188
221
|
|
|
222
|
+
/** @type {Storage} */
|
|
189
223
|
const storage = {
|
|
190
224
|
eventData: await createHookArray([], {
|
|
191
225
|
hook: config.hook,
|
|
@@ -207,7 +241,7 @@ export class StorageManager {
|
|
|
207
241
|
|
|
208
242
|
adSpendData: await createHookArray([], {
|
|
209
243
|
hook: config.hook,
|
|
210
|
-
type: "
|
|
244
|
+
type: "ad-spend",
|
|
211
245
|
filepath: `${config.simulationName || 'adspend'}-ADSPEND`,
|
|
212
246
|
format: config.format || "csv",
|
|
213
247
|
concurrency: config.concurrency || 1,
|
|
@@ -267,7 +301,7 @@ export class StorageManager {
|
|
|
267
301
|
hook: config.hook,
|
|
268
302
|
type: "lookup",
|
|
269
303
|
filepath: `${config.simulationName || 'lookup'}-${lookupConfig.key}-LOOKUP`,
|
|
270
|
-
format:
|
|
304
|
+
format: "csv", // Always force CSV for lookup tables
|
|
271
305
|
concurrency: config.concurrency || 1,
|
|
272
306
|
context: this.context
|
|
273
307
|
});
|
package/lib/generators/events.js
CHANGED
|
@@ -102,19 +102,22 @@ export async function makeEvent(
|
|
|
102
102
|
// Set event time using TimeSoup for realistic distribution
|
|
103
103
|
if (earliestTime) {
|
|
104
104
|
if (isFirstEvent) {
|
|
105
|
-
|
|
105
|
+
// Apply time shift to move to present day using precomputed value
|
|
106
|
+
eventTemplate.time = dayjs.unix(earliestTime).add(context.TIME_SHIFT_SECONDS, 'seconds').toISOString();
|
|
106
107
|
} else {
|
|
107
|
-
|
|
108
|
+
// Get time from TimeSoup and apply precomputed time shift
|
|
109
|
+
const soupTime = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
|
|
110
|
+
eventTemplate.time = dayjs(soupTime).add(context.TIME_SHIFT_SECONDS, 'seconds').toISOString();
|
|
108
111
|
}
|
|
109
112
|
}
|
|
110
113
|
|
|
111
114
|
// Add anonymous and session identifiers
|
|
112
115
|
if (anonymousIds.length) {
|
|
113
|
-
eventTemplate.device_id =
|
|
116
|
+
eventTemplate.device_id = u.pickRandom(anonymousIds);
|
|
114
117
|
}
|
|
115
118
|
|
|
116
119
|
if (sessionIds.length) {
|
|
117
|
-
eventTemplate.session_id =
|
|
120
|
+
eventTemplate.session_id = u.pickRandom(sessionIds);
|
|
118
121
|
}
|
|
119
122
|
|
|
120
123
|
// Sometimes add user_id (for attribution modeling)
|
|
@@ -127,16 +130,28 @@ export async function makeEvent(
|
|
|
127
130
|
eventTemplate.user_id = distinct_id;
|
|
128
131
|
}
|
|
129
132
|
|
|
130
|
-
//
|
|
131
|
-
const props = Object.assign({}, chosenEvent.properties, superProps);
|
|
132
|
-
|
|
133
|
+
// PERFORMANCE: Process properties directly without creating intermediate object
|
|
133
134
|
// Add custom properties from event configuration
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
if (chosenEvent.properties) {
|
|
136
|
+
for (const key in chosenEvent.properties) {
|
|
137
|
+
try {
|
|
138
|
+
eventTemplate[key] = u.choose(chosenEvent.properties[key]);
|
|
139
|
+
} catch (e) {
|
|
140
|
+
console.error(`error with ${key} in ${chosenEvent.event} event`, e);
|
|
141
|
+
// Continue processing other properties
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Add super properties (override event properties if needed)
|
|
147
|
+
if (superProps) {
|
|
148
|
+
for (const key in superProps) {
|
|
149
|
+
try {
|
|
150
|
+
eventTemplate[key] = u.choose(superProps[key]);
|
|
151
|
+
} catch (e) {
|
|
152
|
+
console.error(`error with ${key} in super props`, e);
|
|
153
|
+
// Continue processing other properties
|
|
154
|
+
}
|
|
140
155
|
}
|
|
141
156
|
}
|
|
142
157
|
|
|
@@ -153,13 +168,21 @@ export async function makeEvent(
|
|
|
153
168
|
const tuple = `${eventTemplate.event}-${eventTemplate.time}-${distinctId}`;
|
|
154
169
|
eventTemplate.insert_id = u.quickHash(tuple);
|
|
155
170
|
|
|
156
|
-
//
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
const
|
|
160
|
-
|
|
171
|
+
// Call hook if configured (before returning the event)
|
|
172
|
+
const { hook } = config;
|
|
173
|
+
if (hook) {
|
|
174
|
+
const hookedEvent = await hook(eventTemplate, "event", {
|
|
175
|
+
user: { distinct_id },
|
|
176
|
+
config
|
|
177
|
+
});
|
|
178
|
+
// If hook returns a modified event, use it; otherwise use original
|
|
179
|
+
if (hookedEvent && typeof hookedEvent === 'object') {
|
|
180
|
+
return hookedEvent;
|
|
181
|
+
}
|
|
161
182
|
}
|
|
162
183
|
|
|
184
|
+
// Note: Time shift already applied above during timestamp calculation
|
|
185
|
+
|
|
163
186
|
return eventTemplate;
|
|
164
187
|
}
|
|
165
188
|
|
|
@@ -58,9 +58,10 @@ export async function sendToMixpanel(context) {
|
|
|
58
58
|
epochEnd: dayjs().unix(),
|
|
59
59
|
dryRun: false,
|
|
60
60
|
abridged: false,
|
|
61
|
-
fixJson:
|
|
61
|
+
fixJson: false,
|
|
62
62
|
showProgress: NODE_ENV === "dev" ? true : false,
|
|
63
|
-
streamFormat: mpImportFormat
|
|
63
|
+
streamFormat: mpImportFormat,
|
|
64
|
+
workers: 35
|
|
64
65
|
};
|
|
65
66
|
|
|
66
67
|
if (isCLI) commonOpts.showProgress = true;
|
|
@@ -134,6 +135,8 @@ export async function sendToMixpanel(context) {
|
|
|
134
135
|
const imported = await mp({ token, groupKey }, groupProfilesToImport, {
|
|
135
136
|
recordType: "group",
|
|
136
137
|
...commonOpts,
|
|
138
|
+
groupKey,
|
|
139
|
+
//dryRun: true
|
|
137
140
|
});
|
|
138
141
|
log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
|
|
139
142
|
importResults.groups.push(imported);
|