make-mp-data 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dungeons/adspend.js +13 -26
- package/dungeons/anon.js +1 -1
- package/dungeons/array-of-object-lookup.js +1 -2
- package/dungeons/benchmark-heavy.js +5 -6
- package/dungeons/benchmark-light.js +13 -28
- package/dungeons/big.js +3 -3
- package/dungeons/business.js +11 -12
- package/dungeons/complex.js +1 -2
- package/dungeons/copilot.js +8 -6
- package/dungeons/education.js +21 -22
- package/dungeons/experiments.js +4 -5
- package/dungeons/fintech.js +25 -26
- package/dungeons/foobar.js +1 -1
- package/dungeons/food.js +24 -25
- package/dungeons/funnels.js +2 -2
- package/dungeons/gaming.js +39 -40
- package/dungeons/media.js +30 -31
- package/dungeons/mil.js +17 -18
- package/dungeons/mirror.js +2 -3
- package/dungeons/retention-cadence.js +1 -2
- package/dungeons/rpg.js +42 -43
- package/dungeons/sanity.js +1 -2
- package/dungeons/sass.js +32 -33
- package/dungeons/scd.js +3 -4
- package/dungeons/simple.js +13 -14
- package/dungeons/social.js +27 -28
- package/dungeons/soup-test.js +52 -0
- package/dungeons/streaming.js +17 -18
- package/dungeons/student-teacher.js +0 -1
- package/dungeons/text-generation.js +0 -1
- package/dungeons/user-agent.js +1 -2
- package/index.js +18 -6
- package/lib/core/config-validator.js +22 -33
- package/lib/core/context.js +6 -3
- package/lib/generators/events.js +13 -10
- package/lib/generators/funnels.js +7 -4
- package/lib/generators/scd.js +29 -17
- package/lib/generators/text.js +18 -12
- package/lib/orchestrators/mixpanel-sender.js +26 -38
- package/lib/orchestrators/user-loop.js +68 -15
- package/lib/templates/phrases.js +8 -5
- package/lib/utils/function-registry.js +17 -0
- package/lib/utils/utils.js +15 -84
- package/package.json +3 -1
- package/types.d.ts +86 -19
- package/lib/templates/verbose-schema.js +0 -272
- package/lib/utils/chart.js +0 -210
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
/** @typedef {import('../../types').Context} Context */
|
|
7
7
|
|
|
8
8
|
import dayjs from "dayjs";
|
|
9
|
-
import path from "path";
|
|
10
9
|
import { comma, ls, rm } from "ak-tools";
|
|
11
10
|
import * as u from "../utils/utils.js";
|
|
12
11
|
import mp from "mixpanel-import";
|
|
@@ -22,8 +21,6 @@ export async function sendToMixpanel(context) {
|
|
|
22
21
|
adSpendData,
|
|
23
22
|
eventData,
|
|
24
23
|
groupProfilesData,
|
|
25
|
-
lookupTableData,
|
|
26
|
-
mirrorEventData,
|
|
27
24
|
scdTableData,
|
|
28
25
|
userProfilesData,
|
|
29
26
|
groupEventData
|
|
@@ -41,35 +38,36 @@ export async function sendToMixpanel(context) {
|
|
|
41
38
|
|
|
42
39
|
const importResults = { events: {}, users: {}, groups: [] };
|
|
43
40
|
const isBATCH_MODE = context.isBatchMode();
|
|
44
|
-
|
|
41
|
+
_verbose = config.verbose !== false;
|
|
45
42
|
|
|
46
43
|
/** @type {import('mixpanel-import').Creds} */
|
|
47
44
|
const creds = { token };
|
|
48
45
|
const mpImportFormat = format === "json" ? "jsonl" : "csv";
|
|
49
46
|
|
|
50
|
-
const isDev = NODE_ENV !== 'production';
|
|
51
|
-
|
|
52
47
|
/** @type {import('mixpanel-import').Options} */
|
|
53
48
|
const commonOpts = {
|
|
54
49
|
region,
|
|
55
50
|
fixData: true,
|
|
56
|
-
verbose:
|
|
51
|
+
verbose: false,
|
|
57
52
|
forceStream: true,
|
|
58
53
|
strict: true,
|
|
59
54
|
epochEnd: dayjs().unix(),
|
|
60
55
|
dryRun: false,
|
|
61
56
|
abridged: false,
|
|
62
57
|
fixJson: false,
|
|
63
|
-
showProgress:
|
|
58
|
+
showProgress: !!config.verbose,
|
|
64
59
|
streamFormat: mpImportFormat,
|
|
65
60
|
workers: 35
|
|
66
61
|
};
|
|
67
62
|
|
|
63
|
+
log(`\n${'─'.repeat(50)}`);
|
|
64
|
+
log(` Importing data to Mixpanel (${region})`);
|
|
65
|
+
log(`${'─'.repeat(50)}\n`);
|
|
66
|
+
|
|
68
67
|
// Import events
|
|
69
68
|
if (eventData?.length > 0 || isBATCH_MODE) {
|
|
70
|
-
log(`
|
|
69
|
+
log(` Events`);
|
|
71
70
|
let eventDataToImport = u.deepClone(eventData);
|
|
72
|
-
// Check if we need to read from disk files instead of memory
|
|
73
71
|
const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && eventData && eventData.length === 0);
|
|
74
72
|
if (shouldReadFromFiles && eventData?.getWriteDir) {
|
|
75
73
|
const writeDir = eventData.getWriteDir();
|
|
@@ -81,15 +79,14 @@ export async function sendToMixpanel(context) {
|
|
|
81
79
|
recordType: "event",
|
|
82
80
|
...commonOpts,
|
|
83
81
|
});
|
|
84
|
-
log(
|
|
82
|
+
log(` -> ${comma(imported.success)} events sent\n`);
|
|
85
83
|
importResults.events = imported;
|
|
86
84
|
}
|
|
87
85
|
|
|
88
86
|
// Import user profiles
|
|
89
87
|
if (userProfilesData?.length > 0 || isBATCH_MODE) {
|
|
90
|
-
log(`
|
|
88
|
+
log(` User Profiles`);
|
|
91
89
|
let userProfilesToImport = u.deepClone(userProfilesData);
|
|
92
|
-
// Check if we need to read from disk files instead of memory
|
|
93
90
|
const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && userProfilesData && userProfilesData.length === 0);
|
|
94
91
|
if (shouldReadFromFiles && userProfilesData?.getWriteDir) {
|
|
95
92
|
const writeDir = userProfilesData.getWriteDir();
|
|
@@ -101,15 +98,14 @@ export async function sendToMixpanel(context) {
|
|
|
101
98
|
recordType: "user",
|
|
102
99
|
...commonOpts,
|
|
103
100
|
});
|
|
104
|
-
log(
|
|
101
|
+
log(` -> ${comma(imported.success)} user profiles sent\n`);
|
|
105
102
|
importResults.users = imported;
|
|
106
103
|
}
|
|
107
104
|
|
|
108
105
|
// Import ad spend data
|
|
109
106
|
if (adSpendData?.length > 0 || isBATCH_MODE) {
|
|
110
|
-
log(`
|
|
107
|
+
log(` Ad Spend`);
|
|
111
108
|
let adSpendDataToImport = u.deepClone(adSpendData);
|
|
112
|
-
// Check if we need to read from disk files instead of memory
|
|
113
109
|
const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && adSpendData && adSpendData.length === 0);
|
|
114
110
|
if (shouldReadFromFiles && adSpendData?.getWriteDir) {
|
|
115
111
|
const writeDir = adSpendData.getWriteDir();
|
|
@@ -121,7 +117,7 @@ export async function sendToMixpanel(context) {
|
|
|
121
117
|
recordType: "event",
|
|
122
118
|
...commonOpts,
|
|
123
119
|
});
|
|
124
|
-
log(
|
|
120
|
+
log(` -> ${comma(imported.success)} ad spend events sent\n`);
|
|
125
121
|
importResults.adSpend = imported;
|
|
126
122
|
}
|
|
127
123
|
|
|
@@ -130,9 +126,8 @@ export async function sendToMixpanel(context) {
|
|
|
130
126
|
for (const groupEntity of groupProfilesData) {
|
|
131
127
|
if (!groupEntity || groupEntity.length === 0) continue;
|
|
132
128
|
const groupKey = groupEntity?.groupKey;
|
|
133
|
-
log(`
|
|
129
|
+
log(` Group Profiles (${groupKey})`);
|
|
134
130
|
let groupProfilesToImport = u.deepClone(groupEntity);
|
|
135
|
-
// Check if we need to read from disk files instead of memory
|
|
136
131
|
const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && groupEntity.length === 0);
|
|
137
132
|
if (shouldReadFromFiles && groupEntity?.getWriteDir) {
|
|
138
133
|
const writeDir = groupEntity.getWriteDir();
|
|
@@ -145,16 +140,15 @@ export async function sendToMixpanel(context) {
|
|
|
145
140
|
...commonOpts,
|
|
146
141
|
groupKey,
|
|
147
142
|
});
|
|
148
|
-
log(
|
|
143
|
+
log(` -> ${comma(imported.success)} ${groupKey} profiles sent\n`);
|
|
149
144
|
importResults.groups.push(imported);
|
|
150
145
|
}
|
|
151
146
|
}
|
|
152
147
|
|
|
153
148
|
// Import group events
|
|
154
149
|
if (groupEventData?.length > 0) {
|
|
155
|
-
log(`
|
|
150
|
+
log(` Group Events`);
|
|
156
151
|
let groupEventDataToImport = u.deepClone(groupEventData);
|
|
157
|
-
// Check if we need to read from disk files instead of memory
|
|
158
152
|
const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && groupEventData.length === 0);
|
|
159
153
|
if (shouldReadFromFiles && groupEventData?.getWriteDir) {
|
|
160
154
|
const writeDir = groupEventData.getWriteDir();
|
|
@@ -167,33 +161,28 @@ export async function sendToMixpanel(context) {
|
|
|
167
161
|
...commonOpts,
|
|
168
162
|
strict: false
|
|
169
163
|
});
|
|
170
|
-
log(
|
|
164
|
+
log(` -> ${comma(imported.success)} group events sent\n`);
|
|
171
165
|
importResults.groupEvents = imported;
|
|
172
166
|
}
|
|
173
167
|
|
|
174
168
|
// Import SCD data (requires service account)
|
|
175
169
|
if (serviceAccount && projectId && serviceSecret) {
|
|
176
170
|
if (scdTableData && Array.isArray(scdTableData) && scdTableData.length > 0) {
|
|
177
|
-
log(`importing SCD data to mixpanel...\n`);
|
|
178
171
|
for (const scdEntity of scdTableData) {
|
|
179
172
|
const scdKey = scdEntity?.scdKey;
|
|
180
173
|
const entityType = scdEntity?.entityType || 'user';
|
|
181
|
-
log(`
|
|
174
|
+
log(` SCD: ${scdKey}`);
|
|
182
175
|
let scdDataToImport = u.deepClone(scdEntity);
|
|
183
|
-
// Check if we need to read from disk files instead of memory
|
|
184
176
|
const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && scdEntity && scdEntity.length === 0);
|
|
185
177
|
if (shouldReadFromFiles && scdEntity?.getWriteDir) {
|
|
186
178
|
const writeDir = scdEntity.getWriteDir();
|
|
187
179
|
const files = await ls(writeDir);
|
|
188
180
|
// @ts-ignore
|
|
189
181
|
scdDataToImport = files.filter(f => f.includes(`-${scdKey}-SCD`))?.pop();
|
|
190
|
-
|
|
191
182
|
}
|
|
192
183
|
|
|
193
|
-
// Derive the data type from the actual SCD data
|
|
194
|
-
// todo: we can do better type inference here we don't need to visit the file
|
|
195
184
|
/** @type {"string" | "number" | "boolean"} */
|
|
196
|
-
|
|
185
|
+
let scdType = 'string';
|
|
197
186
|
const scdExamplesValues = context.config.scdProps[Object.keys(context.config.scdProps).find(k => k === scdKey)].values;
|
|
198
187
|
if (scdExamplesValues) {
|
|
199
188
|
if (typeof scdExamplesValues[0] === 'number') {
|
|
@@ -203,8 +192,6 @@ export async function sendToMixpanel(context) {
|
|
|
203
192
|
}
|
|
204
193
|
}
|
|
205
194
|
|
|
206
|
-
|
|
207
|
-
|
|
208
195
|
/** @type {import('mixpanel-import').Options} */
|
|
209
196
|
const options = {
|
|
210
197
|
recordType: "scd",
|
|
@@ -215,12 +202,10 @@ export async function sendToMixpanel(context) {
|
|
|
215
202
|
...commonOpts,
|
|
216
203
|
};
|
|
217
204
|
|
|
218
|
-
// For group SCDs, add the groupKey
|
|
219
205
|
if (entityType !== "user") {
|
|
220
206
|
options.groupKey = entityType;
|
|
221
207
|
}
|
|
222
208
|
|
|
223
|
-
// SCD data is sketch and it shouldn't fail the whole import
|
|
224
209
|
try {
|
|
225
210
|
const imported = await mp(
|
|
226
211
|
{
|
|
@@ -232,16 +217,18 @@ export async function sendToMixpanel(context) {
|
|
|
232
217
|
scdDataToImport,
|
|
233
218
|
options
|
|
234
219
|
);
|
|
235
|
-
log(
|
|
220
|
+
log(` -> ${comma(imported.success)} ${scdKey} SCD entries sent\n`);
|
|
236
221
|
importResults[`${scdKey}_scd`] = imported;
|
|
237
222
|
} catch (err) {
|
|
238
|
-
log(
|
|
223
|
+
log(` !! failed: ${scdKey} SCD — ${err.message}\n`);
|
|
239
224
|
importResults[`${scdKey}_scd`] = { success: 0, failed: 0, error: err.message };
|
|
240
225
|
}
|
|
241
226
|
}
|
|
242
227
|
}
|
|
243
228
|
}
|
|
244
229
|
|
|
230
|
+
log(`${'─'.repeat(50)}\n`);
|
|
231
|
+
|
|
245
232
|
// Clean up batch files if needed
|
|
246
233
|
if (!writeToDisk && isBATCH_MODE) {
|
|
247
234
|
const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
|
|
@@ -265,9 +252,10 @@ export async function sendToMixpanel(context) {
|
|
|
265
252
|
}
|
|
266
253
|
|
|
267
254
|
/**
|
|
268
|
-
*
|
|
255
|
+
* Logging function that respects verbose config
|
|
269
256
|
* @param {string} message - Message to log
|
|
270
257
|
*/
|
|
258
|
+
let _verbose = true;
|
|
271
259
|
function log(message) {
|
|
272
|
-
console.log(message);
|
|
260
|
+
if (_verbose) console.log(message);
|
|
273
261
|
}
|
|
@@ -23,7 +23,7 @@ import { makeSCD } from "../generators/scd.js";
|
|
|
23
23
|
export async function userLoop(context) {
|
|
24
24
|
const { config, storage, defaults } = context;
|
|
25
25
|
const chance = u.getChance();
|
|
26
|
-
const concurrency = config?.concurrency
|
|
26
|
+
const concurrency = config?.concurrency ?? 1;
|
|
27
27
|
const USER_CONN = pLimit(concurrency);
|
|
28
28
|
|
|
29
29
|
const {
|
|
@@ -55,8 +55,19 @@ export async function userLoop(context) {
|
|
|
55
55
|
// Track if we've already logged the strict event count message
|
|
56
56
|
let hasLoggedStrictCountReached = false;
|
|
57
57
|
|
|
58
|
+
// Handle graceful shutdown on SIGINT (Ctrl+C)
|
|
59
|
+
let cancelled = false;
|
|
60
|
+
const onSigint = () => {
|
|
61
|
+
cancelled = true;
|
|
62
|
+
if (verbose) console.log(`\n\nStopping generation (Ctrl+C)...\n`);
|
|
63
|
+
};
|
|
64
|
+
process.on('SIGINT', onSigint);
|
|
65
|
+
|
|
58
66
|
for (let i = 0; i < numUsers; i++) {
|
|
59
67
|
const userPromise = USER_CONN(async () => {
|
|
68
|
+
// Bail out if cancelled
|
|
69
|
+
if (cancelled) return;
|
|
70
|
+
|
|
60
71
|
// Bail out early if strictEventCount is enabled and we've hit numEvents
|
|
61
72
|
if (strictEventCount && context.getEventCount() >= numEvents) {
|
|
62
73
|
if (verbose && !hasLoggedStrictCountReached) {
|
|
@@ -148,18 +159,22 @@ export async function userLoop(context) {
|
|
|
148
159
|
|
|
149
160
|
const userSCD = {};
|
|
150
161
|
for (const [index, key] of scdTableKeys.entries()) {
|
|
151
|
-
const { max =
|
|
162
|
+
const { max = 10 } = scdProps[key];
|
|
152
163
|
const mutations = chance.integer({ min: 1, max });
|
|
153
|
-
|
|
164
|
+
let changes = await makeSCD(context, scdProps[key], key, distinct_id, mutations, created);
|
|
154
165
|
userSCD[key] = changes;
|
|
155
166
|
|
|
156
|
-
await config.hook(changes, "scd-pre", {
|
|
167
|
+
const hookResult = await config.hook(changes, "scd-pre", {
|
|
157
168
|
profile,
|
|
158
169
|
type: 'user',
|
|
159
170
|
scd: { [key]: scdProps[key] },
|
|
160
171
|
config,
|
|
161
172
|
allSCDs: userSCD
|
|
162
173
|
});
|
|
174
|
+
if (Array.isArray(hookResult)) {
|
|
175
|
+
changes = hookResult;
|
|
176
|
+
userSCD[key] = changes;
|
|
177
|
+
}
|
|
163
178
|
}
|
|
164
179
|
|
|
165
180
|
let numEventsThisUserWillPreform = Math.floor(chance.normal({
|
|
@@ -184,39 +199,74 @@ export async function userLoop(context) {
|
|
|
184
199
|
const secondsInDay = 86400;
|
|
185
200
|
const noise = () => chance.integer({ min: 0, max: secondsInDay });
|
|
186
201
|
let usersEvents = [];
|
|
202
|
+
let userConverted = true;
|
|
203
|
+
|
|
204
|
+
// Pre-compute weighted events array for standalone event selection
|
|
205
|
+
const weightedEvents = config.events.reduce((acc, event) => {
|
|
206
|
+
const w = Math.max(1, Math.min(Math.floor(event.weight) || 1, 10));
|
|
207
|
+
for (let i = 0; i < w; i++) acc.push(event);
|
|
208
|
+
return acc;
|
|
209
|
+
}, []);
|
|
210
|
+
|
|
211
|
+
// Build churn event lookup: { eventName: returnLikelihood }
|
|
212
|
+
const churnEvents = new Map();
|
|
213
|
+
for (const ev of config.events) {
|
|
214
|
+
if (ev.isChurnEvent) {
|
|
215
|
+
churnEvents.set(ev.event, ev.returnLikelihood ?? 0);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
187
218
|
|
|
188
219
|
// PATH FOR USERS BORN IN DATASET AND PERFORMING FIRST FUNNEL
|
|
189
220
|
if (firstFunnels.length && userIsBornInDataset) {
|
|
190
221
|
const firstFunnel = chance.pickone(firstFunnels, user);
|
|
191
222
|
const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
|
|
192
|
-
const [data,
|
|
223
|
+
const [data, converted] = await makeFunnel(context, firstFunnel, user, firstTime, profile, userSCD);
|
|
224
|
+
userConverted = converted;
|
|
193
225
|
|
|
194
226
|
const timeShift = context.getTimeShift();
|
|
195
227
|
userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
|
|
196
228
|
numEventsPreformed += data.length;
|
|
197
229
|
usersEvents = usersEvents.concat(data);
|
|
198
|
-
|
|
199
|
-
if (!userConverted) {
|
|
200
|
-
return;
|
|
201
|
-
}
|
|
202
230
|
} else {
|
|
203
231
|
userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
|
|
204
232
|
}
|
|
205
233
|
|
|
206
|
-
// ALL SUBSEQUENT
|
|
234
|
+
// ALL SUBSEQUENT EVENTS (funnels for converted users, standalone for all)
|
|
235
|
+
let userChurned = false;
|
|
207
236
|
while (numEventsPreformed < numEventsThisUserWillPreform) {
|
|
208
|
-
|
|
237
|
+
let newEvents;
|
|
238
|
+
if (usageFunnels.length && userConverted) {
|
|
209
239
|
const currentFunnel = chance.pickone(usageFunnels);
|
|
210
|
-
const [data,
|
|
240
|
+
const [data, converted] = await makeFunnel(context, currentFunnel, user, userFirstEventTime, profile, userSCD);
|
|
211
241
|
numEventsPreformed += data.length;
|
|
212
|
-
|
|
242
|
+
newEvents = data;
|
|
213
243
|
} else {
|
|
214
|
-
const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(
|
|
244
|
+
const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(weightedEvents), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
|
|
215
245
|
numEventsPreformed++;
|
|
216
|
-
|
|
246
|
+
newEvents = [data];
|
|
247
|
+
}
|
|
248
|
+
usersEvents = usersEvents.concat(newEvents);
|
|
249
|
+
|
|
250
|
+
// Check for churn events — if user churned, they may stop generating
|
|
251
|
+
if (churnEvents.size > 0) {
|
|
252
|
+
const eventsToCheck = Array.isArray(newEvents[0]) ? newEvents.flat() : newEvents;
|
|
253
|
+
for (const ev of eventsToCheck) {
|
|
254
|
+
if (ev.event && churnEvents.has(ev.event)) {
|
|
255
|
+
const returnLikelihood = churnEvents.get(ev.event);
|
|
256
|
+
const userReturns = returnLikelihood > 0 && chance.bool({ likelihood: returnLikelihood * 100 });
|
|
257
|
+
if (!userReturns) {
|
|
258
|
+
userChurned = true;
|
|
259
|
+
break;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
if (userChurned) break;
|
|
217
264
|
}
|
|
218
265
|
}
|
|
219
266
|
|
|
267
|
+
// Remove events flagged as future timestamps (before dungeon hooks see them)
|
|
268
|
+
usersEvents = usersEvents.filter(e => !e._drop);
|
|
269
|
+
|
|
220
270
|
// Hook for processing all user events
|
|
221
271
|
if (config.hook) {
|
|
222
272
|
const newEvents = await config.hook(usersEvents, "everything", {
|
|
@@ -255,6 +305,9 @@ export async function userLoop(context) {
|
|
|
255
305
|
|
|
256
306
|
// Wait for all users to complete
|
|
257
307
|
await Promise.all(userPromises);
|
|
308
|
+
|
|
309
|
+
// Clean up SIGINT handler
|
|
310
|
+
process.removeListener('SIGINT', onSigint);
|
|
258
311
|
}
|
|
259
312
|
|
|
260
313
|
|
package/lib/templates/phrases.js
CHANGED
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
* Contains real human speech patterns, not templates
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
+
import { getChance } from '../utils/utils.js';
|
|
7
|
+
|
|
6
8
|
// ============= Core Phrase Bank =============
|
|
7
9
|
|
|
8
10
|
export const PHRASE_BANK = {
|
|
@@ -1970,17 +1972,18 @@ export const PHRASE_BANK = {
|
|
|
1970
1972
|
|
|
1971
1973
|
// Helper function for ticket IDs
|
|
1972
1974
|
TICKET_ID: () => {
|
|
1975
|
+
const c = getChance();
|
|
1973
1976
|
const prefix = ['TKT', 'CASE', 'REQ', 'INC', 'BUG'];
|
|
1974
|
-
const p =
|
|
1975
|
-
const num =
|
|
1977
|
+
const p = c.pickone(prefix);
|
|
1978
|
+
const num = c.integer({ min: 10000, max: 99999 });
|
|
1976
1979
|
return `${p}-${num}`;
|
|
1977
1980
|
},
|
|
1978
|
-
|
|
1981
|
+
|
|
1979
1982
|
// Helper functions for random values
|
|
1980
|
-
RAND5: () =>
|
|
1983
|
+
RAND5: () => getChance().integer({ min: 10000, max: 99999 }),
|
|
1981
1984
|
RAND_ERROR: () => {
|
|
1982
1985
|
const errors = ["404 Not Found", "500 Internal Server Error", "403 Forbidden", "Connection Timeout"];
|
|
1983
|
-
return
|
|
1986
|
+
return getChance().pickone(errors);
|
|
1984
1987
|
}
|
|
1985
1988
|
};
|
|
1986
1989
|
|
|
@@ -224,6 +224,23 @@ export const FUNCTION_REGISTRY = {
|
|
|
224
224
|
description: 'Generate Android device ID'
|
|
225
225
|
},
|
|
226
226
|
|
|
227
|
+
// Commonly used utility functions from dungeons
|
|
228
|
+
pickAWinner: {
|
|
229
|
+
minArgs: 1,
|
|
230
|
+
maxArgs: 2,
|
|
231
|
+
description: 'Pick from array with power-law weighting (most common values first)'
|
|
232
|
+
},
|
|
233
|
+
weighChoices: {
|
|
234
|
+
minArgs: 1,
|
|
235
|
+
maxArgs: 1,
|
|
236
|
+
description: 'Weight choices by frequency in array (more duplicates = higher weight)'
|
|
237
|
+
},
|
|
238
|
+
decimal: {
|
|
239
|
+
minArgs: 0,
|
|
240
|
+
maxArgs: 3,
|
|
241
|
+
description: 'Generate random decimal (min, max, fixed decimal places)'
|
|
242
|
+
},
|
|
243
|
+
|
|
227
244
|
// Special function for arrow functions
|
|
228
245
|
arrow: {
|
|
229
246
|
minArgs: 1,
|
package/lib/utils/utils.js
CHANGED
|
@@ -2,7 +2,6 @@ import fs from 'fs';
|
|
|
2
2
|
import Chance from 'chance';
|
|
3
3
|
import readline from 'readline';
|
|
4
4
|
import { comma, uid } from 'ak-tools';
|
|
5
|
-
import { spawn } from 'child_process';
|
|
6
5
|
import dayjs from 'dayjs';
|
|
7
6
|
import utc from 'dayjs/plugin/utc.js';
|
|
8
7
|
import path from 'path';
|
|
@@ -474,7 +473,7 @@ function companyName(words = 2, separator = " ") {
|
|
|
474
473
|
const cycle = [industryAdjectives, companyNouns];
|
|
475
474
|
for (let i = 0; i < words; i++) {
|
|
476
475
|
const index = i % cycle.length;
|
|
477
|
-
const word = cycle[index][
|
|
476
|
+
const word = cycle[index][getChance().integer({ min: 0, max: cycle[index].length - 1 })];
|
|
478
477
|
if (name === "") {
|
|
479
478
|
name = word;
|
|
480
479
|
} else {
|
|
@@ -873,9 +872,8 @@ function shuffleArray(array) {
|
|
|
873
872
|
|
|
874
873
|
function pickRandom(array) {
|
|
875
874
|
if (!array || array.length === 0) return undefined;
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
return array[randomIndex];
|
|
875
|
+
const chance = getChance();
|
|
876
|
+
return chance.pickone(array);
|
|
879
877
|
}
|
|
880
878
|
|
|
881
879
|
function shuffleExceptFirst(array) {
|
|
@@ -1129,15 +1127,6 @@ function progress(arrayOfArrays) {
|
|
|
1129
1127
|
process.stdout.write(finalMessage);
|
|
1130
1128
|
}
|
|
1131
1129
|
|
|
1132
|
-
function openFinder(path, callback) {
|
|
1133
|
-
path = path || '/';
|
|
1134
|
-
let p = spawn('open', [path]);
|
|
1135
|
-
p.on('error', (err) => {
|
|
1136
|
-
p.kill();
|
|
1137
|
-
return callback(err);
|
|
1138
|
-
});
|
|
1139
|
-
};
|
|
1140
|
-
|
|
1141
1130
|
function getUniqueKeys(data) {
|
|
1142
1131
|
const keysSet = new Set();
|
|
1143
1132
|
data.forEach(item => {
|
|
@@ -1186,57 +1175,20 @@ let soupHits = 0;
|
|
|
1186
1175
|
* @param {number} [latestTime]
|
|
1187
1176
|
* @param {number} [peaks=5]
|
|
1188
1177
|
*/
|
|
1178
|
+
/**
|
|
1179
|
+
* Generates a timestamp within a time range using clustered normal distributions.
|
|
1180
|
+
* Divides the range into `peaks` chunks, picks one randomly, then samples within it.
|
|
1181
|
+
* Returns unix seconds (not ISO string) for performance — caller converts once.
|
|
1182
|
+
*/
|
|
1189
1183
|
function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0) {
|
|
1190
|
-
if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
|
|
1191
|
-
if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
|
|
1192
|
-
const chance = getChance();
|
|
1193
|
-
const totalRange = latestTime - earliestTime;
|
|
1194
|
-
const chunkSize = totalRange / peaks;
|
|
1195
|
-
|
|
1196
|
-
// Select a random chunk based on the number of peaks
|
|
1197
|
-
const peakIndex = integer(0, peaks - 1);
|
|
1198
|
-
const chunkStart = earliestTime + peakIndex * chunkSize;
|
|
1199
|
-
const chunkEnd = chunkStart + chunkSize;
|
|
1200
|
-
const chunkMid = (chunkStart + chunkEnd) / 2;
|
|
1201
|
-
|
|
1202
|
-
// Generate a single timestamp within this chunk using a normal distribution centered at chunkMid
|
|
1203
|
-
let offset;
|
|
1204
|
-
let iterations = 0;
|
|
1205
|
-
let isValidTime = false;
|
|
1206
|
-
do {
|
|
1207
|
-
iterations++;
|
|
1208
|
-
soupHits++;
|
|
1209
|
-
offset = chance.normal({ mean: mean, dev: chunkSize / deviation });
|
|
1210
|
-
isValidTime = validTime(chunkMid + offset, earliestTime, latestTime);
|
|
1211
|
-
if (iterations > 25000) {
|
|
1212
|
-
throw `${iterations} iterations... exceeded`;
|
|
1213
|
-
}
|
|
1214
|
-
} while (chunkMid + offset < chunkStart || chunkMid + offset > chunkEnd);
|
|
1215
|
-
|
|
1216
|
-
try {
|
|
1217
|
-
return dayjs.unix(chunkMid + offset).toISOString();
|
|
1218
|
-
}
|
|
1219
|
-
|
|
1220
|
-
catch (e) {
|
|
1221
|
-
//escape hatch
|
|
1222
|
-
// console.log('BAD TIME', e?.message);
|
|
1223
|
-
if (NODE_ENV === 'dev') debugger;
|
|
1224
|
-
return dayjs.unix(integer(earliestTime, latestTime)).toISOString();
|
|
1225
|
-
}
|
|
1226
|
-
}
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
function NewTimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0) {
|
|
1230
|
-
if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix(); // 30 days ago
|
|
1184
|
+
if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
|
|
1231
1185
|
if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
|
|
1232
1186
|
const chance = getChance();
|
|
1233
1187
|
let totalRange = latestTime - earliestTime;
|
|
1234
1188
|
if (totalRange <= 0 || earliestTime > latestTime) {
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
earliestTime = tempEarly;
|
|
1239
|
-
latestTime = tempLate;
|
|
1189
|
+
const temp = latestTime;
|
|
1190
|
+
latestTime = earliestTime;
|
|
1191
|
+
earliestTime = temp;
|
|
1240
1192
|
totalRange = latestTime - earliestTime;
|
|
1241
1193
|
}
|
|
1242
1194
|
const chunkSize = totalRange / peaks;
|
|
@@ -1247,29 +1199,15 @@ function NewTimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean =
|
|
|
1247
1199
|
const chunkEnd = chunkStart + chunkSize;
|
|
1248
1200
|
const chunkMid = (chunkStart + chunkEnd) / 2;
|
|
1249
1201
|
|
|
1250
|
-
//
|
|
1202
|
+
// Generate offset from normal distribution, clamp to chunk boundaries
|
|
1251
1203
|
const maxDeviation = chunkSize / deviation;
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
// Clamp to chunk boundaries to prevent infinite loops
|
|
1204
|
+
const offset = chance.normal({ mean: mean, dev: maxDeviation });
|
|
1255
1205
|
const proposedTime = chunkMid + offset;
|
|
1256
1206
|
const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
|
|
1257
|
-
|
|
1258
|
-
// Ensure it's within the overall valid range
|
|
1259
1207
|
const finalTime = Math.max(earliestTime, Math.min(latestTime, clampedTime));
|
|
1260
1208
|
|
|
1261
|
-
// Update soup hits counter (keep for compatibility)
|
|
1262
1209
|
soupHits++;
|
|
1263
|
-
|
|
1264
|
-
try {
|
|
1265
|
-
return dayjs.unix(finalTime).toISOString();
|
|
1266
|
-
}
|
|
1267
|
-
|
|
1268
|
-
catch (e) {
|
|
1269
|
-
//escape hatch
|
|
1270
|
-
// console.log('BAD TIME', e?.message);
|
|
1271
|
-
return dayjs.unix(integer(earliestTime, latestTime)).toISOString();
|
|
1272
|
-
}
|
|
1210
|
+
return finalTime;
|
|
1273
1211
|
}
|
|
1274
1212
|
|
|
1275
1213
|
|
|
@@ -1406,11 +1344,6 @@ function wrapFunc(obj, func, recursion = 0, parentKey = null, grandParentKey = n
|
|
|
1406
1344
|
|
|
1407
1345
|
// }
|
|
1408
1346
|
|
|
1409
|
-
const chance = getChance();
|
|
1410
|
-
function odds(num) {
|
|
1411
|
-
return chance.bool({ likelihood: num });
|
|
1412
|
-
}
|
|
1413
|
-
|
|
1414
1347
|
/**
|
|
1415
1348
|
* makes a random-sized array of emojis
|
|
1416
1349
|
* @param {number} max=10
|
|
@@ -1505,7 +1438,6 @@ export {
|
|
|
1505
1438
|
initChance,
|
|
1506
1439
|
getChance,
|
|
1507
1440
|
decimal,
|
|
1508
|
-
odds,
|
|
1509
1441
|
validTime,
|
|
1510
1442
|
validEvent,
|
|
1511
1443
|
|
|
@@ -1515,7 +1447,6 @@ export {
|
|
|
1515
1447
|
weighNumRange,
|
|
1516
1448
|
progress,
|
|
1517
1449
|
range,
|
|
1518
|
-
openFinder,
|
|
1519
1450
|
getUniqueKeys,
|
|
1520
1451
|
person,
|
|
1521
1452
|
pickAWinner,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "make-mp-data",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.4",
|
|
4
4
|
"description": "builds all mixpanel primitives for a given project",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
"scripts": {
|
|
19
19
|
"post": "npm publish",
|
|
20
20
|
"test": "NODE_ENV=test vitest run",
|
|
21
|
+
"test:coverage": "NODE_ENV=test vitest run --coverage",
|
|
21
22
|
"typecheck": "tsc --noEmit",
|
|
22
23
|
"dev": "nodemon scratch.mjs --ignore ./data/*",
|
|
23
24
|
"prune": "rm -f ./data/* && rm -f ./tmp/* && rm -f vscode-profile-*",
|
|
@@ -69,6 +70,7 @@
|
|
|
69
70
|
"yargs": "^17.7.2"
|
|
70
71
|
},
|
|
71
72
|
"devDependencies": {
|
|
73
|
+
"@vitest/coverage-v8": "^2.1.9",
|
|
72
74
|
"@vitest/ui": "^2.1.9",
|
|
73
75
|
"nodemon": "^3.1.3",
|
|
74
76
|
"typescript": "^5.6.0",
|