make-mp-data 2.0.21 → 2.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,52 +19,52 @@ import os from "os";
19
19
  * @returns {Funnel[]} Array of inferred funnel configurations
20
20
  */
21
21
  function inferFunnels(events) {
22
- const createdFunnels = [];
23
- const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
24
- const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
25
- const numFunnelsToCreate = Math.ceil(usageEvents.length);
26
-
27
- /** @type {import('../../types.js').Funnel} */
28
- const funnelTemplate = {
29
- sequence: [],
30
- conversionRate: 50,
31
- order: 'sequential',
32
- requireRepeats: false,
33
- props: {},
34
- timeToConvert: 1,
35
- isFirstFunnel: false,
36
- weight: 1
37
- };
38
-
39
- // Create funnels for first events
40
- if (firstEvents.length) {
41
- for (const event of firstEvents) {
42
- createdFunnels.push({
43
- ...u.deepClone(funnelTemplate),
44
- sequence: [event],
45
- isFirstFunnel: true,
46
- conversionRate: 100
47
- });
48
- }
49
- }
50
-
51
- // At least one funnel with all usage events
52
- createdFunnels.push({ ...u.deepClone(funnelTemplate), sequence: usageEvents });
53
-
54
- // Create random funnels for the rest
55
- for (let i = 1; i < numFunnelsToCreate; i++) {
56
- /** @type {import('../../types.js').Funnel} */
57
- const funnel = { ...u.deepClone(funnelTemplate) };
58
- funnel.conversionRate = u.integer(25, 75);
59
- funnel.timeToConvert = u.integer(1, 10);
60
- funnel.weight = u.integer(1, 10);
61
- const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
62
- funnel.sequence = sequence;
63
- funnel.order = 'random';
64
- createdFunnels.push(funnel);
65
- }
66
-
67
- return createdFunnels;
22
+ const createdFunnels = [];
23
+ const firstEvents = events.filter((e) => e.isFirstEvent).map((e) => e.event);
24
+ const usageEvents = events.filter((e) => !e.isFirstEvent).map((e) => e.event);
25
+ const numFunnelsToCreate = Math.ceil(usageEvents.length);
26
+
27
+ /** @type {import('../../types.js').Funnel} */
28
+ const funnelTemplate = {
29
+ sequence: [],
30
+ conversionRate: 50,
31
+ order: 'sequential',
32
+ requireRepeats: false,
33
+ props: {},
34
+ timeToConvert: 1,
35
+ isFirstFunnel: false,
36
+ weight: 1
37
+ };
38
+
39
+ // Create funnels for first events
40
+ if (firstEvents.length) {
41
+ for (const event of firstEvents) {
42
+ createdFunnels.push({
43
+ ...u.deepClone(funnelTemplate),
44
+ sequence: [event],
45
+ isFirstFunnel: true,
46
+ conversionRate: 100
47
+ });
48
+ }
49
+ }
50
+
51
+ // At least one funnel with all usage events
52
+ createdFunnels.push({ ...u.deepClone(funnelTemplate), sequence: usageEvents });
53
+
54
+ // Create random funnels for the rest
55
+ for (let i = 1; i < numFunnelsToCreate; i++) {
56
+ /** @type {import('../../types.js').Funnel} */
57
+ const funnel = { ...u.deepClone(funnelTemplate) };
58
+ funnel.conversionRate = u.integer(25, 75);
59
+ funnel.timeToConvert = u.integer(1, 10);
60
+ funnel.weight = u.integer(1, 10);
61
+ const sequence = u.shuffleArray(usageEvents).slice(0, u.integer(2, usageEvents.length));
62
+ funnel.sequence = sequence;
63
+ funnel.order = 'random';
64
+ createdFunnels.push(funnel);
65
+ }
66
+
67
+ return createdFunnels;
68
68
  }
69
69
 
70
70
  /**
@@ -73,169 +73,180 @@ function inferFunnels(events) {
73
73
  * @returns {Dungeon} Validated and enriched configuration
74
74
  */
75
75
  export function validateDungeonConfig(config) {
76
- const chance = u.getChance();
77
-
78
- // Extract configuration with defaults
79
- let {
80
- seed,
81
- numEvents = 100_000,
82
- numUsers = 1000,
83
- numDays = 30,
84
- epochStart = 0,
85
- epochEnd = dayjs().unix(),
86
- events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
87
- superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
88
- funnels = [],
89
- userProps = {
90
- spiritAnimal: chance.animal.bind(chance),
91
- },
92
- scdProps = {},
93
- mirrorProps = {},
94
- groupKeys = [],
95
- groupProps = {},
96
- lookupTables = [],
97
- hasAnonIds = false,
98
- hasSessionIds = false,
99
- format = "csv",
100
- token = null,
101
- region = "US",
102
- writeToDisk = false,
103
- verbose = true,
104
- makeChart = false,
105
- soup = {},
106
- hook = (record) => record,
107
- hasAdSpend = false,
108
- hasCampaigns = false,
109
- hasLocation = false,
110
- hasAvatar = false,
111
- isAnonymous = false,
112
- hasBrowser = false,
113
- hasAndroidDevices = false,
114
- hasDesktopDevices = false,
115
- hasIOSDevices = false,
116
- alsoInferFunnels = false,
117
- name = "",
118
- batchSize = 500_000,
119
- concurrency
120
- } = config;
121
-
122
- // Set concurrency default only if not provided
123
- if (concurrency === undefined || concurrency === null) {
124
- concurrency = Math.min(os.cpus().length * 2, 16);
125
- }
126
-
127
- // Ensure defaults for deep objects
128
- if (!config.superProps) config.superProps = superProps;
129
- if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
130
-
131
- // Setting up "TIME"
132
- if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
133
- if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
134
- if (epochStart && numDays) { } // noop
135
- if (!epochStart && !numDays) {
136
- throw new Error("Either epochStart or numDays must be provided");
137
- }
138
-
139
- // Generate simulation name
140
- config.simulationName = name || makeName();
141
- config.name = config.simulationName;
142
-
143
- // Validate events
144
- if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
145
-
146
- // Convert string events to objects
147
- if (typeof events[0] === "string") {
148
- events = events.map(e => ({ event: /** @type {string} */ (e) }));
149
- }
150
-
151
- // Handle funnel inference
152
- if (alsoInferFunnels) {
153
- const inferredFunnels = inferFunnels(events);
154
- funnels = [...funnels, ...inferredFunnels];
155
- }
156
-
157
- // Create funnel for events not in other funnels
158
- const eventContainedInFunnels = Array.from(funnels.reduce((acc, f) => {
159
- const events = f.sequence;
160
- events.forEach(event => acc.add(event));
161
- return acc;
162
- }, new Set()));
163
-
164
- const eventsNotInFunnels = events
165
- .filter(e => !e.isFirstEvent)
166
- .filter(e => !eventContainedInFunnels.includes(e.event))
167
- .map(e => e.event);
168
-
169
- if (eventsNotInFunnels.length) {
170
- const sequence = u.shuffleArray(eventsNotInFunnels.flatMap(event => {
171
- let evWeight;
172
- // First check the config
173
- if (config.events) {
174
- evWeight = config.events.find(e => e.event === event)?.weight || 1;
175
- }
176
- // Fallback on default
177
- else {
178
- evWeight = 1;
179
- }
180
- return Array(evWeight).fill(event);
181
- }));
182
-
183
- funnels.push({
184
- sequence,
185
- conversionRate: 50,
186
- order: 'random',
187
- timeToConvert: 24 * 14,
188
- requireRepeats: false,
189
- });
190
- }
191
-
192
- // Event validation
193
- const validatedEvents = u.validateEventConfig(events);
194
-
195
- // Build final config object
196
- const validatedConfig = {
197
- ...config,
198
- concurrency,
199
- funnels,
200
- batchSize,
201
- seed,
202
- numEvents,
203
- numUsers,
204
- numDays,
205
- epochStart,
206
- epochEnd,
207
- events: validatedEvents,
208
- superProps,
209
- userProps,
210
- scdProps,
211
- mirrorProps,
212
- groupKeys,
213
- groupProps,
214
- lookupTables,
215
- hasAnonIds,
216
- hasSessionIds,
217
- format,
218
- token,
219
- region,
220
- writeToDisk,
221
- verbose,
222
- makeChart,
223
- soup,
224
- hook,
225
- hasAdSpend,
226
- hasCampaigns,
227
- hasLocation,
228
- hasAvatar,
229
- isAnonymous,
230
- hasBrowser,
231
- hasAndroidDevices,
232
- hasDesktopDevices,
233
- hasIOSDevices,
234
- simulationName: config.simulationName,
235
- name: config.name
236
- };
237
-
238
- return validatedConfig;
76
+ const chance = u.getChance();
77
+
78
+ // Extract configuration with defaults
79
+ let {
80
+ seed,
81
+ numEvents = 100_000,
82
+ numUsers = 1000,
83
+ numDays = 30,
84
+ epochStart = 0,
85
+ epochEnd = dayjs().unix(),
86
+ events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }],
87
+ superProps = { luckyNumber: [2, 2, 4, 4, 42, 42, 42, 2, 2, 4, 4, 42, 42, 42, 420] },
88
+ funnels = [],
89
+ userProps = {
90
+ spiritAnimal: chance.animal.bind(chance),
91
+ },
92
+ scdProps = {},
93
+ mirrorProps = {},
94
+ groupKeys = [],
95
+ groupProps = {},
96
+ lookupTables = [],
97
+ hasAnonIds = false,
98
+ hasSessionIds = false,
99
+ format = "csv",
100
+ token = null,
101
+ region = "US",
102
+ writeToDisk = false,
103
+ verbose = true,
104
+ makeChart = false,
105
+ soup = {},
106
+ hook = (record) => record,
107
+ hasAdSpend = false,
108
+ hasCampaigns = false,
109
+ hasLocation = false,
110
+ hasAvatar = false,
111
+ isAnonymous = false,
112
+ hasBrowser = false,
113
+ hasAndroidDevices = false,
114
+ hasDesktopDevices = false,
115
+ hasIOSDevices = false,
116
+ alsoInferFunnels = false,
117
+ name = "",
118
+ batchSize = 500_000,
119
+ concurrency
120
+ } = config;
121
+
122
+ // Set concurrency default only if not provided
123
+ if (concurrency === undefined || concurrency === null) {
124
+ concurrency = Math.min(os.cpus().length * 2, 16);
125
+ }
126
+
127
+ // Ensure defaults for deep objects
128
+ if (!config.superProps) config.superProps = superProps;
129
+ if (!config.userProps || Object.keys(config?.userProps || {})) config.userProps = userProps;
130
+
131
+ // Setting up "TIME"
132
+ if (epochStart && !numDays) numDays = dayjs.unix(epochEnd).diff(dayjs.unix(epochStart), "day");
133
+ if (!epochStart && numDays) epochStart = dayjs.unix(epochEnd).subtract(numDays, "day").unix();
134
+ if (epochStart && numDays) { } // noop
135
+ if (!epochStart && !numDays) {
136
+ throw new Error("Either epochStart or numDays must be provided");
137
+ }
138
+
139
+ // Generate simulation name
140
+ config.simulationName = name || makeName();
141
+ config.name = config.simulationName;
142
+
143
+ // Validate events
144
+ if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
145
+
146
+ // Convert string events to objects
147
+ if (typeof events[0] === "string") {
148
+ events = events.map(e => ({ event: /** @type {string} */ (e) }));
149
+ }
150
+
151
+ // Handle funnel inference
152
+ if (alsoInferFunnels) {
153
+ const inferredFunnels = inferFunnels(events);
154
+ funnels = [...funnels, ...inferredFunnels];
155
+ }
156
+
157
+ // Create funnel for events not in other funnels
158
+ const eventContainedInFunnels = Array.from(funnels.reduce((acc, f) => {
159
+ const events = f.sequence;
160
+ events.forEach(event => acc.add(event));
161
+ return acc;
162
+ }, new Set()));
163
+
164
+ const eventsNotInFunnels = events
165
+ .filter(e => !e.isFirstEvent)
166
+ .filter(e => !eventContainedInFunnels.includes(e.event))
167
+ .map(e => e.event);
168
+
169
+ if (eventsNotInFunnels.length) {
170
+ const sequence = u.shuffleArray(eventsNotInFunnels.flatMap(event => {
171
+ let evWeight;
172
+ // First check the config
173
+ if (config.events) {
174
+ evWeight = config.events.find(e => e.event === event)?.weight || 1;
175
+ }
176
+ // Fallback on default
177
+ else {
178
+ evWeight = 1;
179
+ }
180
+ return Array(evWeight).fill(event);
181
+ }));
182
+
183
+ funnels.push({
184
+ sequence,
185
+ conversionRate: 50,
186
+ order: 'random',
187
+ timeToConvert: 24 * 14,
188
+ requireRepeats: false,
189
+ });
190
+ }
191
+
192
+ // ensure every event in funnel sequence exists in our eventConfig
193
+ const eventInFunnels = Array.from(new Set(funnels.map(funnel => funnel.sequence).flat()));
194
+
195
+ const definedEvents = events.map(e => e.event);
196
+ const missingEvents = eventInFunnels.filter(event => !definedEvents.includes(event));
197
+ if (missingEvents.length) {
198
+ throw new Error(`Funnel sequences contain events that are not defined in the events config:\n${missingEvents.join(', ')}\nPlease ensure all events in funnel sequences are defined in the events array.`);
199
+ }
200
+
201
+
202
+
203
+ // Event validation
204
+ const validatedEvents = u.validateEventConfig(events);
205
+
206
+ // Build final config object
207
+ const validatedConfig = {
208
+ ...config,
209
+ concurrency,
210
+ funnels,
211
+ batchSize,
212
+ seed,
213
+ numEvents,
214
+ numUsers,
215
+ numDays,
216
+ epochStart,
217
+ epochEnd,
218
+ events: validatedEvents,
219
+ superProps,
220
+ userProps,
221
+ scdProps,
222
+ mirrorProps,
223
+ groupKeys,
224
+ groupProps,
225
+ lookupTables,
226
+ hasAnonIds,
227
+ hasSessionIds,
228
+ format,
229
+ token,
230
+ region,
231
+ writeToDisk,
232
+ verbose,
233
+ makeChart,
234
+ soup,
235
+ hook,
236
+ hasAdSpend,
237
+ hasCampaigns,
238
+ hasLocation,
239
+ hasAvatar,
240
+ isAnonymous,
241
+ hasBrowser,
242
+ hasAndroidDevices,
243
+ hasDesktopDevices,
244
+ hasIOSDevices,
245
+ simulationName: config.simulationName,
246
+ name: config.name
247
+ };
248
+
249
+ return validatedConfig;
239
250
  }
240
251
 
241
252
  /**
@@ -249,16 +260,16 @@ export function validateDungeonConfig(config) {
249
260
  * @returns {boolean} True if validation passes
250
261
  */
251
262
  export function validateRequiredConfig(config) {
252
- if (!config) {
253
- throw new Error("Configuration is required");
254
- }
255
-
256
- if (typeof config !== 'object') {
257
- throw new Error("Configuration must be an object");
258
- }
259
-
260
- // Could add more specific validation here
261
- return true;
263
+ if (!config) {
264
+ throw new Error("Configuration is required");
265
+ }
266
+
267
+ if (typeof config !== 'object') {
268
+ throw new Error("Configuration must be an object");
269
+ }
270
+
271
+ // Could add more specific validation here
272
+ return true;
262
273
  }
263
274
 
264
275
  export { inferFunnels };
@@ -32,13 +32,24 @@ export async function createHookArray(arr = [], opts) {
32
32
  } = opts || {};
33
33
 
34
34
  const FILE_CONN = pLimit(concurrency);
35
- const { config = {}, runtime = {} } = context;
35
+ const {
36
+ config = {},
37
+ runtime = {
38
+ operations: 0,
39
+ eventCount: 0,
40
+ userCount: 0,
41
+ isBatchMode: false,
42
+ verbose: false,
43
+ isCLI: false
44
+ }
45
+ } = context;
36
46
  const BATCH_SIZE = config.batchSize || 1_000_000;
37
47
  const NODE_ENV = process.env.NODE_ENV || "unknown";
38
48
 
39
49
  let batch = 0;
40
50
  let writeDir;
41
51
  let isBatchMode = runtime.isBatchMode || false;
52
+ let isWriting = false; // Prevent concurrent writes
42
53
 
43
54
  // Determine write directory
44
55
  const dataFolder = path.resolve("./data");
@@ -109,15 +120,26 @@ export async function createHookArray(arr = [], opts) {
109
120
  }
110
121
  }
111
122
 
112
- if (arr.length > BATCH_SIZE) {
123
+ // Check batch size and handle writes synchronously to prevent race conditions
124
+ if (arr.length > BATCH_SIZE && !isWriting) {
125
+ isWriting = true; // Lock to prevent concurrent writes
113
126
  isBatchMode = true;
114
127
  runtime.isBatchMode = true; // Update runtime state
115
128
  batch++;
116
129
  const writePath = getWritePath();
117
- const writeResult = await FILE_CONN(() => writeToDisk(arr, { writePath }));
118
- // Ensure array is cleared after successful write
119
- arr.length = 0;
120
- return writeResult;
130
+
131
+ try {
132
+ // Create a copy of the data to write
133
+ const dataToWrite = [...arr];
134
+ // Clear the array immediately to prevent race conditions
135
+ arr.length = 0;
136
+
137
+ // Write to disk (this is now synchronous from the perspective of batch management)
138
+ const writeResult = await FILE_CONN(() => writeToDisk(dataToWrite, { writePath }));
139
+ return writeResult;
140
+ } finally {
141
+ isWriting = false; // Release the lock
142
+ }
121
143
  } else {
122
144
  return Promise.resolve(false);
123
145
  }
@@ -148,10 +170,21 @@ export async function createHookArray(arr = [], opts) {
148
170
 
149
171
  async function flush() {
150
172
  if (arr.length > 0) {
151
- batch++;
152
- const writePath = getWritePath();
153
- await FILE_CONN(() => writeToDisk(arr, { writePath }));
154
- if (isBatchMode) arr.length = 0; // free up memory for batch mode
173
+ // Wait for any ongoing writes to complete
174
+ while (isWriting) {
175
+ await new Promise(resolve => setTimeout(resolve, 10));
176
+ }
177
+
178
+ isWriting = true;
179
+ try {
180
+ batch++;
181
+ const writePath = getWritePath();
182
+ const dataToWrite = [...arr];
183
+ arr.length = 0; // Clear array after copying data
184
+ await FILE_CONN(() => writeToDisk(dataToWrite, { writePath }));
185
+ } finally {
186
+ isWriting = false;
187
+ }
155
188
  }
156
189
  }
157
190
 
@@ -58,9 +58,10 @@ export async function sendToMixpanel(context) {
58
58
  epochEnd: dayjs().unix(),
59
59
  dryRun: false,
60
60
  abridged: false,
61
- fixJson: true,
61
+ fixJson: false,
62
62
  showProgress: NODE_ENV === "dev" ? true : false,
63
- streamFormat: mpImportFormat
63
+ streamFormat: mpImportFormat,
64
+ workers: 35
64
65
  };
65
66
 
66
67
  if (isCLI) commonOpts.showProgress = true;
@@ -134,6 +135,8 @@ export async function sendToMixpanel(context) {
134
135
  const imported = await mp({ token, groupKey }, groupProfilesToImport, {
135
136
  recordType: "group",
136
137
  ...commonOpts,
138
+ groupKey,
139
+ //dryRun: true
137
140
  });
138
141
  log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
139
142
  importResults.groups.push(imported);