make-mp-data 3.0.2 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dungeons/adspend.js +13 -26
  2. package/dungeons/anon.js +1 -1
  3. package/dungeons/array-of-object-lookup.js +1 -2
  4. package/dungeons/benchmark-heavy.js +5 -6
  5. package/dungeons/benchmark-light.js +13 -28
  6. package/dungeons/big.js +3 -3
  7. package/dungeons/business.js +11 -12
  8. package/dungeons/complex.js +1 -2
  9. package/dungeons/copilot.js +8 -6
  10. package/dungeons/education.js +21 -22
  11. package/dungeons/experiments.js +4 -5
  12. package/dungeons/fintech.js +25 -26
  13. package/dungeons/foobar.js +1 -1
  14. package/dungeons/food.js +24 -25
  15. package/dungeons/funnels.js +2 -2
  16. package/dungeons/gaming.js +39 -40
  17. package/dungeons/media.js +30 -31
  18. package/dungeons/mil.js +17 -18
  19. package/dungeons/mirror.js +2 -3
  20. package/dungeons/retention-cadence.js +1 -2
  21. package/dungeons/rpg.js +42 -43
  22. package/dungeons/sanity.js +1 -2
  23. package/dungeons/sass.js +32 -33
  24. package/dungeons/scd.js +3 -4
  25. package/dungeons/simple.js +13 -14
  26. package/dungeons/social.js +27 -28
  27. package/dungeons/soup-test.js +52 -0
  28. package/dungeons/streaming.js +17 -18
  29. package/dungeons/student-teacher.js +0 -1
  30. package/dungeons/text-generation.js +0 -1
  31. package/dungeons/user-agent.js +1 -2
  32. package/index.js +18 -6
  33. package/lib/core/config-validator.js +22 -33
  34. package/lib/core/context.js +6 -3
  35. package/lib/generators/events.js +13 -10
  36. package/lib/generators/funnels.js +7 -4
  37. package/lib/generators/scd.js +29 -17
  38. package/lib/generators/text.js +18 -12
  39. package/lib/orchestrators/mixpanel-sender.js +26 -38
  40. package/lib/orchestrators/user-loop.js +68 -15
  41. package/lib/templates/phrases.js +8 -5
  42. package/lib/utils/function-registry.js +17 -0
  43. package/lib/utils/utils.js +15 -84
  44. package/package.json +3 -1
  45. package/types.d.ts +86 -19
  46. package/lib/templates/verbose-schema.js +0 -272
  47. package/lib/utils/chart.js +0 -210
@@ -6,7 +6,6 @@
6
6
  /** @typedef {import('../../types').Context} Context */
7
7
 
8
8
  import dayjs from "dayjs";
9
- import path from "path";
10
9
  import { comma, ls, rm } from "ak-tools";
11
10
  import * as u from "../utils/utils.js";
12
11
  import mp from "mixpanel-import";
@@ -22,8 +21,6 @@ export async function sendToMixpanel(context) {
22
21
  adSpendData,
23
22
  eventData,
24
23
  groupProfilesData,
25
- lookupTableData,
26
- mirrorEventData,
27
24
  scdTableData,
28
25
  userProfilesData,
29
26
  groupEventData
@@ -41,35 +38,36 @@ export async function sendToMixpanel(context) {
41
38
 
42
39
  const importResults = { events: {}, users: {}, groups: [] };
43
40
  const isBATCH_MODE = context.isBatchMode();
44
- const NODE_ENV = process.env.NODE_ENV || "unknown";
41
+ _verbose = config.verbose !== false;
45
42
 
46
43
  /** @type {import('mixpanel-import').Creds} */
47
44
  const creds = { token };
48
45
  const mpImportFormat = format === "json" ? "jsonl" : "csv";
49
46
 
50
- const isDev = NODE_ENV !== 'production';
51
-
52
47
  /** @type {import('mixpanel-import').Options} */
53
48
  const commonOpts = {
54
49
  region,
55
50
  fixData: true,
56
- verbose: isDev,
51
+ verbose: false,
57
52
  forceStream: true,
58
53
  strict: true,
59
54
  epochEnd: dayjs().unix(),
60
55
  dryRun: false,
61
56
  abridged: false,
62
57
  fixJson: false,
63
- showProgress: isDev,
58
+ showProgress: !!config.verbose,
64
59
  streamFormat: mpImportFormat,
65
60
  workers: 35
66
61
  };
67
62
 
63
+ log(`\n${'─'.repeat(50)}`);
64
+ log(` Importing data to Mixpanel (${region})`);
65
+ log(`${'─'.repeat(50)}\n`);
66
+
68
67
  // Import events
69
68
  if (eventData?.length > 0 || isBATCH_MODE) {
70
- log(`importing events to mixpanel...\n`);
69
+ log(` Events`);
71
70
  let eventDataToImport = u.deepClone(eventData);
72
- // Check if we need to read from disk files instead of memory
73
71
  const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && eventData && eventData.length === 0);
74
72
  if (shouldReadFromFiles && eventData?.getWriteDir) {
75
73
  const writeDir = eventData.getWriteDir();
@@ -81,15 +79,14 @@ export async function sendToMixpanel(context) {
81
79
  recordType: "event",
82
80
  ...commonOpts,
83
81
  });
84
- log(`\tsent ${comma(imported.success)} events\n`);
82
+ log(` -> ${comma(imported.success)} events sent\n`);
85
83
  importResults.events = imported;
86
84
  }
87
85
 
88
86
  // Import user profiles
89
87
  if (userProfilesData?.length > 0 || isBATCH_MODE) {
90
- log(`importing user profiles to mixpanel...\n`);
88
+ log(` User Profiles`);
91
89
  let userProfilesToImport = u.deepClone(userProfilesData);
92
- // Check if we need to read from disk files instead of memory
93
90
  const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && userProfilesData && userProfilesData.length === 0);
94
91
  if (shouldReadFromFiles && userProfilesData?.getWriteDir) {
95
92
  const writeDir = userProfilesData.getWriteDir();
@@ -101,15 +98,14 @@ export async function sendToMixpanel(context) {
101
98
  recordType: "user",
102
99
  ...commonOpts,
103
100
  });
104
- log(`\tsent ${comma(imported.success)} user profiles\n`);
101
+ log(` -> ${comma(imported.success)} user profiles sent\n`);
105
102
  importResults.users = imported;
106
103
  }
107
104
 
108
105
  // Import ad spend data
109
106
  if (adSpendData?.length > 0 || isBATCH_MODE) {
110
- log(`importing ad spend data to mixpanel...\n`);
107
+ log(` Ad Spend`);
111
108
  let adSpendDataToImport = u.deepClone(adSpendData);
112
- // Check if we need to read from disk files instead of memory
113
109
  const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && adSpendData && adSpendData.length === 0);
114
110
  if (shouldReadFromFiles && adSpendData?.getWriteDir) {
115
111
  const writeDir = adSpendData.getWriteDir();
@@ -121,7 +117,7 @@ export async function sendToMixpanel(context) {
121
117
  recordType: "event",
122
118
  ...commonOpts,
123
119
  });
124
- log(`\tsent ${comma(imported.success)} ad spend events\n`);
120
+ log(` -> ${comma(imported.success)} ad spend events sent\n`);
125
121
  importResults.adSpend = imported;
126
122
  }
127
123
 
@@ -130,9 +126,8 @@ export async function sendToMixpanel(context) {
130
126
  for (const groupEntity of groupProfilesData) {
131
127
  if (!groupEntity || groupEntity.length === 0) continue;
132
128
  const groupKey = groupEntity?.groupKey;
133
- log(`importing ${groupKey} profiles to mixpanel...\n`);
129
+ log(` Group Profiles (${groupKey})`);
134
130
  let groupProfilesToImport = u.deepClone(groupEntity);
135
- // Check if we need to read from disk files instead of memory
136
131
  const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && groupEntity.length === 0);
137
132
  if (shouldReadFromFiles && groupEntity?.getWriteDir) {
138
133
  const writeDir = groupEntity.getWriteDir();
@@ -145,16 +140,15 @@ export async function sendToMixpanel(context) {
145
140
  ...commonOpts,
146
141
  groupKey,
147
142
  });
148
- log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
143
+ log(` -> ${comma(imported.success)} ${groupKey} profiles sent\n`);
149
144
  importResults.groups.push(imported);
150
145
  }
151
146
  }
152
147
 
153
148
  // Import group events
154
149
  if (groupEventData?.length > 0) {
155
- log(`importing group events to mixpanel...\n`);
150
+ log(` Group Events`);
156
151
  let groupEventDataToImport = u.deepClone(groupEventData);
157
- // Check if we need to read from disk files instead of memory
158
152
  const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && groupEventData.length === 0);
159
153
  if (shouldReadFromFiles && groupEventData?.getWriteDir) {
160
154
  const writeDir = groupEventData.getWriteDir();
@@ -167,33 +161,28 @@ export async function sendToMixpanel(context) {
167
161
  ...commonOpts,
168
162
  strict: false
169
163
  });
170
- log(`\tsent ${comma(imported.success)} group events\n`);
164
+ log(` -> ${comma(imported.success)} group events sent\n`);
171
165
  importResults.groupEvents = imported;
172
166
  }
173
167
 
174
168
  // Import SCD data (requires service account)
175
169
  if (serviceAccount && projectId && serviceSecret) {
176
170
  if (scdTableData && Array.isArray(scdTableData) && scdTableData.length > 0) {
177
- log(`importing SCD data to mixpanel...\n`);
178
171
  for (const scdEntity of scdTableData) {
179
172
  const scdKey = scdEntity?.scdKey;
180
173
  const entityType = scdEntity?.entityType || 'user';
181
- log(`importing ${scdKey} SCD data to mixpanel...\n`);
174
+ log(` SCD: ${scdKey}`);
182
175
  let scdDataToImport = u.deepClone(scdEntity);
183
- // Check if we need to read from disk files instead of memory
184
176
  const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && scdEntity && scdEntity.length === 0);
185
177
  if (shouldReadFromFiles && scdEntity?.getWriteDir) {
186
178
  const writeDir = scdEntity.getWriteDir();
187
179
  const files = await ls(writeDir);
188
180
  // @ts-ignore
189
181
  scdDataToImport = files.filter(f => f.includes(`-${scdKey}-SCD`))?.pop();
190
-
191
182
  }
192
183
 
193
- // Derive the data type from the actual SCD data
194
- // todo: we can do better type inference here we don't need to visit the file
195
184
  /** @type {"string" | "number" | "boolean"} */
196
- let scdType = 'string'; // default to string
185
+ let scdType = 'string';
197
186
  const scdExamplesValues = context.config.scdProps[Object.keys(context.config.scdProps).find(k => k === scdKey)].values;
198
187
  if (scdExamplesValues) {
199
188
  if (typeof scdExamplesValues[0] === 'number') {
@@ -203,8 +192,6 @@ export async function sendToMixpanel(context) {
203
192
  }
204
193
  }
205
194
 
206
-
207
-
208
195
  /** @type {import('mixpanel-import').Options} */
209
196
  const options = {
210
197
  recordType: "scd",
@@ -215,12 +202,10 @@ export async function sendToMixpanel(context) {
215
202
  ...commonOpts,
216
203
  };
217
204
 
218
- // For group SCDs, add the groupKey
219
205
  if (entityType !== "user") {
220
206
  options.groupKey = entityType;
221
207
  }
222
208
 
223
- // SCD data is sketch and it shouldn't fail the whole import
224
209
  try {
225
210
  const imported = await mp(
226
211
  {
@@ -232,16 +217,18 @@ export async function sendToMixpanel(context) {
232
217
  scdDataToImport,
233
218
  options
234
219
  );
235
- log(`\tsent ${comma(imported.success)} ${scdKey} SCD data\n`);
220
+ log(` -> ${comma(imported.success)} ${scdKey} SCD entries sent\n`);
236
221
  importResults[`${scdKey}_scd`] = imported;
237
222
  } catch (err) {
238
- log(`\tfailed to import ${scdKey} SCD data: ${err.message}\n`);
223
+ log(` !! failed: ${scdKey} SCD ${err.message}\n`);
239
224
  importResults[`${scdKey}_scd`] = { success: 0, failed: 0, error: err.message };
240
225
  }
241
226
  }
242
227
  }
243
228
  }
244
229
 
230
+ log(`${'─'.repeat(50)}\n`);
231
+
245
232
  // Clean up batch files if needed
246
233
  if (!writeToDisk && isBATCH_MODE) {
247
234
  const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
@@ -265,9 +252,10 @@ export async function sendToMixpanel(context) {
265
252
  }
266
253
 
267
254
  /**
268
- * Simple logging function
255
+ * Logging function that respects verbose config
269
256
  * @param {string} message - Message to log
270
257
  */
258
+ let _verbose = true;
271
259
  function log(message) {
272
- console.log(message);
260
+ if (_verbose) console.log(message);
273
261
  }
@@ -23,7 +23,7 @@ import { makeSCD } from "../generators/scd.js";
23
23
  export async function userLoop(context) {
24
24
  const { config, storage, defaults } = context;
25
25
  const chance = u.getChance();
26
- const concurrency = config?.concurrency || Math.min(os.cpus().length * 2, 16);
26
+ const concurrency = config?.concurrency ?? 1;
27
27
  const USER_CONN = pLimit(concurrency);
28
28
 
29
29
  const {
@@ -55,8 +55,19 @@ export async function userLoop(context) {
55
55
  // Track if we've already logged the strict event count message
56
56
  let hasLoggedStrictCountReached = false;
57
57
 
58
+ // Handle graceful shutdown on SIGINT (Ctrl+C)
59
+ let cancelled = false;
60
+ const onSigint = () => {
61
+ cancelled = true;
62
+ if (verbose) console.log(`\n\nStopping generation (Ctrl+C)...\n`);
63
+ };
64
+ process.on('SIGINT', onSigint);
65
+
58
66
  for (let i = 0; i < numUsers; i++) {
59
67
  const userPromise = USER_CONN(async () => {
68
+ // Bail out if cancelled
69
+ if (cancelled) return;
70
+
60
71
  // Bail out early if strictEventCount is enabled and we've hit numEvents
61
72
  if (strictEventCount && context.getEventCount() >= numEvents) {
62
73
  if (verbose && !hasLoggedStrictCountReached) {
@@ -148,18 +159,22 @@ export async function userLoop(context) {
148
159
 
149
160
  const userSCD = {};
150
161
  for (const [index, key] of scdTableKeys.entries()) {
151
- const { max = 100 } = scdProps[key];
162
+ const { max = 10 } = scdProps[key];
152
163
  const mutations = chance.integer({ min: 1, max });
153
- const changes = await makeSCD(context, scdProps[key], key, distinct_id, mutations, created);
164
+ let changes = await makeSCD(context, scdProps[key], key, distinct_id, mutations, created);
154
165
  userSCD[key] = changes;
155
166
 
156
- await config.hook(changes, "scd-pre", {
167
+ const hookResult = await config.hook(changes, "scd-pre", {
157
168
  profile,
158
169
  type: 'user',
159
170
  scd: { [key]: scdProps[key] },
160
171
  config,
161
172
  allSCDs: userSCD
162
173
  });
174
+ if (Array.isArray(hookResult)) {
175
+ changes = hookResult;
176
+ userSCD[key] = changes;
177
+ }
163
178
  }
164
179
 
165
180
  let numEventsThisUserWillPreform = Math.floor(chance.normal({
@@ -184,39 +199,74 @@ export async function userLoop(context) {
184
199
  const secondsInDay = 86400;
185
200
  const noise = () => chance.integer({ min: 0, max: secondsInDay });
186
201
  let usersEvents = [];
202
+ let userConverted = true;
203
+
204
+ // Pre-compute weighted events array for standalone event selection
205
+ const weightedEvents = config.events.reduce((acc, event) => {
206
+ const w = Math.max(1, Math.min(Math.floor(event.weight) || 1, 10));
207
+ for (let i = 0; i < w; i++) acc.push(event);
208
+ return acc;
209
+ }, []);
210
+
211
+ // Build churn event lookup: { eventName: returnLikelihood }
212
+ const churnEvents = new Map();
213
+ for (const ev of config.events) {
214
+ if (ev.isChurnEvent) {
215
+ churnEvents.set(ev.event, ev.returnLikelihood ?? 0);
216
+ }
217
+ }
187
218
 
188
219
  // PATH FOR USERS BORN IN DATASET AND PERFORMING FIRST FUNNEL
189
220
  if (firstFunnels.length && userIsBornInDataset) {
190
221
  const firstFunnel = chance.pickone(firstFunnels, user);
191
222
  const firstTime = adjustedCreated.subtract(noise(), 'seconds').unix();
192
- const [data, userConverted] = await makeFunnel(context, firstFunnel, user, firstTime, profile, userSCD);
223
+ const [data, converted] = await makeFunnel(context, firstFunnel, user, firstTime, profile, userSCD);
224
+ userConverted = converted;
193
225
 
194
226
  const timeShift = context.getTimeShift();
195
227
  userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
196
228
  numEventsPreformed += data.length;
197
229
  usersEvents = usersEvents.concat(data);
198
-
199
- if (!userConverted) {
200
- return;
201
- }
202
230
  } else {
203
231
  userFirstEventTime = adjustedCreated.subtract(noise(), 'seconds').unix();
204
232
  }
205
233
 
206
- // ALL SUBSEQUENT FUNNELS
234
+ // ALL SUBSEQUENT EVENTS (funnels for converted users, standalone for all)
235
+ let userChurned = false;
207
236
  while (numEventsPreformed < numEventsThisUserWillPreform) {
208
- if (usageFunnels.length) {
237
+ let newEvents;
238
+ if (usageFunnels.length && userConverted) {
209
239
  const currentFunnel = chance.pickone(usageFunnels);
210
- const [data, userConverted] = await makeFunnel(context, currentFunnel, user, userFirstEventTime, profile, userSCD);
240
+ const [data, converted] = await makeFunnel(context, currentFunnel, user, userFirstEventTime, profile, userSCD);
211
241
  numEventsPreformed += data.length;
212
- usersEvents = usersEvents.concat(data);
242
+ newEvents = data;
213
243
  } else {
214
- const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
244
+ const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(weightedEvents), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
215
245
  numEventsPreformed++;
216
- usersEvents = usersEvents.concat(data);
246
+ newEvents = [data];
247
+ }
248
+ usersEvents = usersEvents.concat(newEvents);
249
+
250
+ // Check for churn events — if user churned, they may stop generating
251
+ if (churnEvents.size > 0) {
252
+ const eventsToCheck = Array.isArray(newEvents[0]) ? newEvents.flat() : newEvents;
253
+ for (const ev of eventsToCheck) {
254
+ if (ev.event && churnEvents.has(ev.event)) {
255
+ const returnLikelihood = churnEvents.get(ev.event);
256
+ const userReturns = returnLikelihood > 0 && chance.bool({ likelihood: returnLikelihood * 100 });
257
+ if (!userReturns) {
258
+ userChurned = true;
259
+ break;
260
+ }
261
+ }
262
+ }
263
+ if (userChurned) break;
217
264
  }
218
265
  }
219
266
 
267
+ // Remove events flagged as future timestamps (before dungeon hooks see them)
268
+ usersEvents = usersEvents.filter(e => !e._drop);
269
+
220
270
  // Hook for processing all user events
221
271
  if (config.hook) {
222
272
  const newEvents = await config.hook(usersEvents, "everything", {
@@ -255,6 +305,9 @@ export async function userLoop(context) {
255
305
 
256
306
  // Wait for all users to complete
257
307
  await Promise.all(userPromises);
308
+
309
+ // Clean up SIGINT handler
310
+ process.removeListener('SIGINT', onSigint);
258
311
  }
259
312
 
260
313
 
@@ -3,6 +3,8 @@
3
3
  * Contains real human speech patterns, not templates
4
4
  */
5
5
 
6
+ import { getChance } from '../utils/utils.js';
7
+
6
8
  // ============= Core Phrase Bank =============
7
9
 
8
10
  export const PHRASE_BANK = {
@@ -1970,17 +1972,18 @@ export const PHRASE_BANK = {
1970
1972
 
1971
1973
  // Helper function for ticket IDs
1972
1974
  TICKET_ID: () => {
1975
+ const c = getChance();
1973
1976
  const prefix = ['TKT', 'CASE', 'REQ', 'INC', 'BUG'];
1974
- const p = prefix[Math.floor(Math.random() * prefix.length)];
1975
- const num = Math.floor(10000 + Math.random() * 90000);
1977
+ const p = c.pickone(prefix);
1978
+ const num = c.integer({ min: 10000, max: 99999 });
1976
1979
  return `${p}-${num}`;
1977
1980
  },
1978
-
1981
+
1979
1982
  // Helper functions for random values
1980
- RAND5: () => Math.floor(10000 + Math.random() * 90000),
1983
+ RAND5: () => getChance().integer({ min: 10000, max: 99999 }),
1981
1984
  RAND_ERROR: () => {
1982
1985
  const errors = ["404 Not Found", "500 Internal Server Error", "403 Forbidden", "Connection Timeout"];
1983
- return errors[Math.floor(Math.random() * errors.length)];
1986
+ return getChance().pickone(errors);
1984
1987
  }
1985
1988
  };
1986
1989
 
@@ -224,6 +224,23 @@ export const FUNCTION_REGISTRY = {
224
224
  description: 'Generate Android device ID'
225
225
  },
226
226
 
227
+ // Commonly used utility functions from dungeons
228
+ pickAWinner: {
229
+ minArgs: 1,
230
+ maxArgs: 2,
231
+ description: 'Pick from array with power-law weighting (most common values first)'
232
+ },
233
+ weighChoices: {
234
+ minArgs: 1,
235
+ maxArgs: 1,
236
+ description: 'Weight choices by frequency in array (more duplicates = higher weight)'
237
+ },
238
+ decimal: {
239
+ minArgs: 0,
240
+ maxArgs: 3,
241
+ description: 'Generate random decimal (min, max, fixed decimal places)'
242
+ },
243
+
227
244
  // Special function for arrow functions
228
245
  arrow: {
229
246
  minArgs: 1,
@@ -2,7 +2,6 @@ import fs from 'fs';
2
2
  import Chance from 'chance';
3
3
  import readline from 'readline';
4
4
  import { comma, uid } from 'ak-tools';
5
- import { spawn } from 'child_process';
6
5
  import dayjs from 'dayjs';
7
6
  import utc from 'dayjs/plugin/utc.js';
8
7
  import path from 'path';
@@ -474,7 +473,7 @@ function companyName(words = 2, separator = " ") {
474
473
  const cycle = [industryAdjectives, companyNouns];
475
474
  for (let i = 0; i < words; i++) {
476
475
  const index = i % cycle.length;
477
- const word = cycle[index][Math.floor(Math.random() * cycle[index].length)];
476
+ const word = cycle[index][getChance().integer({ min: 0, max: cycle[index].length - 1 })];
478
477
  if (name === "") {
479
478
  name = word;
480
479
  } else {
@@ -873,9 +872,8 @@ function shuffleArray(array) {
873
872
 
874
873
  function pickRandom(array) {
875
874
  if (!array || array.length === 0) return undefined;
876
- // PERFORMANCE: Use Math.random() instead of chance.integer() for simple cases
877
- const randomIndex = Math.floor(Math.random() * array.length);
878
- return array[randomIndex];
875
+ const chance = getChance();
876
+ return chance.pickone(array);
879
877
  }
880
878
 
881
879
  function shuffleExceptFirst(array) {
@@ -1129,15 +1127,6 @@ function progress(arrayOfArrays) {
1129
1127
  process.stdout.write(finalMessage);
1130
1128
  }
1131
1129
 
1132
- function openFinder(path, callback) {
1133
- path = path || '/';
1134
- let p = spawn('open', [path]);
1135
- p.on('error', (err) => {
1136
- p.kill();
1137
- return callback(err);
1138
- });
1139
- };
1140
-
1141
1130
  function getUniqueKeys(data) {
1142
1131
  const keysSet = new Set();
1143
1132
  data.forEach(item => {
@@ -1186,57 +1175,20 @@ let soupHits = 0;
1186
1175
  * @param {number} [latestTime]
1187
1176
  * @param {number} [peaks=5]
1188
1177
  */
1178
+ /**
1179
+ * Generates a timestamp within a time range using clustered normal distributions.
1180
+ * Divides the range into `peaks` chunks, picks one randomly, then samples within it.
1181
+ * Returns unix seconds (not ISO string) for performance — caller converts once.
1182
+ */
1189
1183
  function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0) {
1190
- if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix(); // 30 days ago
1191
- if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
1192
- const chance = getChance();
1193
- const totalRange = latestTime - earliestTime;
1194
- const chunkSize = totalRange / peaks;
1195
-
1196
- // Select a random chunk based on the number of peaks
1197
- const peakIndex = integer(0, peaks - 1);
1198
- const chunkStart = earliestTime + peakIndex * chunkSize;
1199
- const chunkEnd = chunkStart + chunkSize;
1200
- const chunkMid = (chunkStart + chunkEnd) / 2;
1201
-
1202
- // Generate a single timestamp within this chunk using a normal distribution centered at chunkMid
1203
- let offset;
1204
- let iterations = 0;
1205
- let isValidTime = false;
1206
- do {
1207
- iterations++;
1208
- soupHits++;
1209
- offset = chance.normal({ mean: mean, dev: chunkSize / deviation });
1210
- isValidTime = validTime(chunkMid + offset, earliestTime, latestTime);
1211
- if (iterations > 25000) {
1212
- throw `${iterations} iterations... exceeded`;
1213
- }
1214
- } while (chunkMid + offset < chunkStart || chunkMid + offset > chunkEnd);
1215
-
1216
- try {
1217
- return dayjs.unix(chunkMid + offset).toISOString();
1218
- }
1219
-
1220
- catch (e) {
1221
- //escape hatch
1222
- // console.log('BAD TIME', e?.message);
1223
- if (NODE_ENV === 'dev') debugger;
1224
- return dayjs.unix(integer(earliestTime, latestTime)).toISOString();
1225
- }
1226
- }
1227
-
1228
-
1229
- function NewTimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0) {
1230
- if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix(); // 30 days ago
1184
+ if (!earliestTime) earliestTime = global.FIXED_BEGIN ? global.FIXED_BEGIN : dayjs().subtract(30, 'd').unix();
1231
1185
  if (!latestTime) latestTime = global.FIXED_NOW ? global.FIXED_NOW : dayjs().unix();
1232
1186
  const chance = getChance();
1233
1187
  let totalRange = latestTime - earliestTime;
1234
1188
  if (totalRange <= 0 || earliestTime > latestTime) {
1235
- //just flip earliest and latest
1236
- let tempEarly = latestTime;
1237
- let tempLate = earliestTime;
1238
- earliestTime = tempEarly;
1239
- latestTime = tempLate;
1189
+ const temp = latestTime;
1190
+ latestTime = earliestTime;
1191
+ earliestTime = temp;
1240
1192
  totalRange = latestTime - earliestTime;
1241
1193
  }
1242
1194
  const chunkSize = totalRange / peaks;
@@ -1247,29 +1199,15 @@ function NewTimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean =
1247
1199
  const chunkEnd = chunkStart + chunkSize;
1248
1200
  const chunkMid = (chunkStart + chunkEnd) / 2;
1249
1201
 
1250
- // Optimized timestamp generation - clamp to valid range instead of looping
1202
+ // Generate offset from normal distribution, clamp to chunk boundaries
1251
1203
  const maxDeviation = chunkSize / deviation;
1252
- let offset = chance.normal({ mean: mean, dev: maxDeviation });
1253
-
1254
- // Clamp to chunk boundaries to prevent infinite loops
1204
+ const offset = chance.normal({ mean: mean, dev: maxDeviation });
1255
1205
  const proposedTime = chunkMid + offset;
1256
1206
  const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
1257
-
1258
- // Ensure it's within the overall valid range
1259
1207
  const finalTime = Math.max(earliestTime, Math.min(latestTime, clampedTime));
1260
1208
 
1261
- // Update soup hits counter (keep for compatibility)
1262
1209
  soupHits++;
1263
-
1264
- try {
1265
- return dayjs.unix(finalTime).toISOString();
1266
- }
1267
-
1268
- catch (e) {
1269
- //escape hatch
1270
- // console.log('BAD TIME', e?.message);
1271
- return dayjs.unix(integer(earliestTime, latestTime)).toISOString();
1272
- }
1210
+ return finalTime;
1273
1211
  }
1274
1212
 
1275
1213
 
@@ -1406,11 +1344,6 @@ function wrapFunc(obj, func, recursion = 0, parentKey = null, grandParentKey = n
1406
1344
 
1407
1345
  // }
1408
1346
 
1409
- const chance = getChance();
1410
- function odds(num) {
1411
- return chance.bool({ likelihood: num });
1412
- }
1413
-
1414
1347
  /**
1415
1348
  * makes a random-sized array of emojis
1416
1349
  * @param {number} max=10
@@ -1505,7 +1438,6 @@ export {
1505
1438
  initChance,
1506
1439
  getChance,
1507
1440
  decimal,
1508
- odds,
1509
1441
  validTime,
1510
1442
  validEvent,
1511
1443
 
@@ -1515,7 +1447,6 @@ export {
1515
1447
  weighNumRange,
1516
1448
  progress,
1517
1449
  range,
1518
- openFinder,
1519
1450
  getUniqueKeys,
1520
1451
  person,
1521
1452
  pickAWinner,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "3.0.2",
3
+ "version": "3.0.4",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -18,6 +18,7 @@
18
18
  "scripts": {
19
19
  "post": "npm publish",
20
20
  "test": "NODE_ENV=test vitest run",
21
+ "test:coverage": "NODE_ENV=test vitest run --coverage",
21
22
  "typecheck": "tsc --noEmit",
22
23
  "dev": "nodemon scratch.mjs --ignore ./data/*",
23
24
  "prune": "rm -f ./data/* && rm -f ./tmp/* && rm -f vscode-profile-*",
@@ -69,6 +70,7 @@
69
70
  "yargs": "^17.7.2"
70
71
  },
71
72
  "devDependencies": {
73
+ "@vitest/coverage-v8": "^2.1.9",
72
74
  "@vitest/ui": "^2.1.9",
73
75
  "nodemon": "^3.1.3",
74
76
  "typescript": "^5.6.0",