make-mp-data 2.0.17 → 2.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,7 @@ const config = {
36
36
  makeChart: false,
37
37
 
38
38
  batchSize: 500_000,
39
- concurrency: 500,
39
+ concurrency: 10,
40
40
 
41
41
  funnels: [],
42
42
  events: [
@@ -29,6 +29,7 @@ const config = {
29
29
  hasSessionIds: false, //if true, hasSessionIds are created for each user
30
30
  alsoInferFunnels: true, //if true, infer funnels from events
31
31
  makeChart: true,
32
+ concurrency: 10,
32
33
  funnels: [
33
34
  {
34
35
  sequence: ["qux", "garply", "durtle", "linny", "fonk", "crumn", "yak"],
@@ -40,6 +40,7 @@ const config = {
40
40
  hasBrowser: true,
41
41
  hasCampaigns: true,
42
42
  isAnonymous: false,
43
+ concurrency: 10,
43
44
 
44
45
 
45
46
  events: [
@@ -118,8 +119,8 @@ const config = {
118
119
  }
119
120
  }
120
121
  ],
121
- superProps: {
122
- currentTheme: weighChoices(["light", "dark", "custom", "light", "dark"]),
122
+ superProps: {
123
+ theme: pickAWinner(["light", "dark", "custom", "light", "dark"]),
123
124
  },
124
125
  /*
125
126
  user properties work the same as event properties
@@ -141,10 +142,53 @@ const config = {
141
142
  groupProps: {},
142
143
  lookupTables: [],
143
144
  hook: function (record, type, meta) {
145
+ if (type === "everything") {
146
+
147
+ //custom themers purchase more:
148
+ const numCustomMode = record.filter(a => a.theme === 'custom').length;
149
+ const numLightMode = record.filter(a => a.theme === 'light').length;
150
+ const numDarkMode = record.filter(a => a.theme === 'dark').length;
151
+ if (numCustomMode > numLightMode || numCustomMode > numDarkMode) {
152
+ //triple their checkout events
153
+ const checkoutEvents = record.filter(a => a.event === 'checkout');
154
+ const newCheckouts = checkoutEvents.map(a => {
155
+ const randomInt = integer(-48, 48);
156
+ const newCheckout = {
157
+ ...a,
158
+ time: dayjs(a.time).add(randomInt, 'hour').toISOString(),
159
+ event: "checkout",
160
+ amount: a.amount * 2,
161
+ coupon: "50%OFF"
162
+ };
163
+ return newCheckout;
164
+ });
165
+ record.push(...newCheckouts);
166
+ }
167
+
168
+ //users who watch low quality videos churn more:
169
+ const loQuality = ["480p", "360p", "240p"];
170
+ const lowQualityWatches = record.filter(a => a.event === 'watch video' && loQuality.includes(a.quality));
171
+ const highQualityWatches = record.filter(a => a.event === 'watch video' && !loQuality.includes(a.quality));
172
+ if (lowQualityWatches.length > highQualityWatches.length) {
173
+ if (flip()) {
174
+ // find midpoint of records
175
+ const midpoint = Math.floor(record.length / 2);
176
+ record = record.slice(0, midpoint);
177
+
178
+ }
179
+ }
180
+
181
+ }
182
+
183
+
184
+
144
185
  return record;
145
186
  }
146
187
  };
147
188
 
189
+ function flip(likelihood = 50) {
190
+ return chance.bool({ likelihood });
191
+ }
148
192
 
149
193
 
150
194
  export default config;
@@ -22,12 +22,13 @@ import getCliParams from './lib/cli/cli.js';
22
22
  const simpleConfig = await import('./dungeons/simple.js');
23
23
  finalConfig = { ...simpleConfig.default, ...cliConfig };
24
24
  }
25
-
26
- const result = await main(finalConfig);
27
-
25
+
26
+
27
+ const result = await main(finalConfig);
28
28
  console.log(`📊 Generated ${(result.eventCount || 0).toLocaleString()} events for ${(result.userCount || 0).toLocaleString()} users`);
29
29
  console.log(`⏱️ Total time: ${result.time?.human || 'unknown'}`);
30
- if (result.files?.length) {
30
+
31
+ if (result.files?.length) {
31
32
  console.log(`📁 Files written: ${result.files.length}`);
32
33
  if (cliConfig.verbose) {
33
34
  result.files.forEach(file => console.log(` ${file}`));
package/index.js CHANGED
@@ -60,7 +60,7 @@ async function main(config) {
60
60
  //cli mode check for positional dungeon config
61
61
  const isCLI = config._ && Array.isArray(config._);
62
62
  if (isCLI) {
63
- const firstArg = config._.slice().pop()
63
+ const firstArg = config._.slice().pop();
64
64
  if (firstArg?.endsWith('.js') && existsSync(firstArg)) {
65
65
  if (config.verbose) {
66
66
  console.log(`\n🔍 Loading dungeon config from: ${firstArg}`);
@@ -73,9 +73,10 @@ async function main(config) {
73
73
  throw error;
74
74
  }
75
75
  }
76
-
76
+
77
77
  }
78
78
 
79
+ if (config.verbose) console.log(`\n🔧 Configuring dungeon with seed: ${config.seed}`);
79
80
  let validatedConfig;
80
81
  try {
81
82
  // Step 1: Validate and enrich configuration
@@ -94,6 +95,7 @@ async function main(config) {
94
95
  await generateAdSpendData(context);
95
96
  }
96
97
 
98
+ if (context.config.verbose) console.log(`\n🔄 Starting user and event generation...\n`);
97
99
  // Step 5: Main user and event generation
98
100
  await userLoop(context);
99
101
 
package/lib/cli/cli.js CHANGED
@@ -100,7 +100,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
100
100
  })
101
101
  .option('concurrency', {
102
102
  alias: 'conn',
103
- default: 500,
103
+ default: 10,
104
104
  demandOption: false,
105
105
  describe: 'concurrency level for data generation',
106
106
  type: 'number'
@@ -4,8 +4,9 @@
4
4
  */
5
5
 
6
6
  import dayjs from "dayjs";
7
- import { makeName, clone } from "ak-tools";
7
+ import { makeName } from "ak-tools";
8
8
  import * as u from "../utils/utils.js";
9
+ import os from "os";
9
10
 
10
11
  /**
11
12
  * Infers funnels from the provided events
@@ -34,7 +35,7 @@ function inferFunnels(events) {
34
35
  if (firstEvents.length) {
35
36
  for (const event of firstEvents) {
36
37
  createdFunnels.push({
37
- ...clone(funnelTemplate),
38
+ ...u.deepClone(funnelTemplate),
38
39
  sequence: [event],
39
40
  isFirstFunnel: true,
40
41
  conversionRate: 100
@@ -43,12 +44,12 @@ function inferFunnels(events) {
43
44
  }
44
45
 
45
46
  // At least one funnel with all usage events
46
- createdFunnels.push({ ...clone(funnelTemplate), sequence: usageEvents });
47
+ createdFunnels.push({ ...u.deepClone(funnelTemplate), sequence: usageEvents });
47
48
 
48
49
  // Create random funnels for the rest
49
50
  for (let i = 1; i < numFunnelsToCreate; i++) {
50
51
  /** @type {import('../../types.js').Funnel} */
51
- const funnel = { ...clone(funnelTemplate) };
52
+ const funnel = { ...u.deepClone(funnelTemplate) };
52
53
  funnel.conversionRate = u.integer(25, 75);
53
54
  funnel.timeToConvert = u.integer(1, 10);
54
55
  funnel.weight = u.integer(1, 10);
@@ -94,7 +95,7 @@ export function validateDungeonConfig(config) {
94
95
  token = null,
95
96
  region = "US",
96
97
  writeToDisk = false,
97
- verbose = false,
98
+ verbose = true,
98
99
  makeChart = false,
99
100
  soup = {},
100
101
  hook = (record) => record,
@@ -110,7 +111,7 @@ export function validateDungeonConfig(config) {
110
111
  alsoInferFunnels = false,
111
112
  name = "",
112
113
  batchSize = 500_000,
113
- concurrency = 500
114
+ concurrency = Math.min(os.cpus().length * 2, 16) // Default to 2x CPU cores, max 16
114
115
  } = config;
115
116
 
116
117
  // Ensure defaults for deep objects
@@ -20,26 +20,26 @@ import * as u from '../utils/utils.js';
20
20
  * @returns {Defaults} Defaults object with factory functions
21
21
  */
22
22
  function createDefaults(config, campaignData) {
23
- const { singleCountry } = config;
24
-
25
- // Pre-compute weighted arrays based on configuration
26
- const locationsUsers = singleCountry ?
27
- locations.filter(l => l.country === singleCountry) :
28
- locations;
29
-
30
- const locationsEvents = singleCountry ?
31
- locations.filter(l => l.country === singleCountry) :
32
- locations;
33
-
34
- return {
35
- locationsUsers: () => u.weighArray(locationsUsers),
36
- locationsEvents: () => u.weighArray(locationsEvents),
37
- iOSDevices: () => u.weighArray(devices.iosDevices),
38
- androidDevices: () => u.weighArray(devices.androidDevices),
39
- desktopDevices: () => u.weighArray(devices.desktopDevices),
40
- browsers: () => u.weighArray(devices.browsers),
41
- campaigns: () => u.weighArray(campaignData)
42
- };
23
+ const { singleCountry } = config;
24
+
25
+ // Pre-compute weighted arrays based on configuration
26
+ const locationsUsers = singleCountry ?
27
+ locations.filter(l => l.country === singleCountry) :
28
+ locations;
29
+
30
+ const locationsEvents = singleCountry ?
31
+ locations.filter(l => l.country === singleCountry) :
32
+ locations;
33
+
34
+ return {
35
+ locationsUsers: () => u.weighArray(locationsUsers),
36
+ locationsEvents: () => u.weighArray(locationsEvents),
37
+ iOSDevices: () => u.weighArray(devices.iosDevices),
38
+ androidDevices: () => u.weighArray(devices.androidDevices),
39
+ desktopDevices: () => u.weighArray(devices.desktopDevices),
40
+ browsers: () => u.weighArray(devices.browsers),
41
+ campaigns: () => u.weighArray(campaignData)
42
+ };
43
43
  }
44
44
 
45
45
  /**
@@ -47,14 +47,14 @@ function createDefaults(config, campaignData) {
47
47
  * @returns {RuntimeState} Runtime state with counters and flags
48
48
  */
49
49
  function createRuntimeState() {
50
- return {
51
- operations: 0,
52
- eventCount: 0,
53
- userCount: 0,
54
- isBatchMode: false,
55
- verbose: false,
56
- isCLI: false
57
- };
50
+ return {
51
+ operations: 0,
52
+ eventCount: 0,
53
+ userCount: 0,
54
+ isBatchMode: false,
55
+ verbose: false,
56
+ isCLI: false
57
+ };
58
58
  }
59
59
 
60
60
  /**
@@ -65,90 +65,91 @@ function createRuntimeState() {
65
65
  * @returns {Context} Context object containing all state and dependencies
66
66
  */
67
67
  export function createContext(config, storage = null, isCliMode = null) {
68
- // Import campaign data (could be made configurable)
69
- const campaignData = campaigns;
70
-
71
- // Create computed defaults based on config
72
- const defaults = createDefaults(config, campaignData);
73
-
74
- // Create runtime state
75
- const runtime = createRuntimeState();
76
-
77
- // Set runtime flags from config
78
- runtime.verbose = config.verbose || false;
79
- runtime.isBatchMode = config.batchSize && config.batchSize < config.numEvents;
80
- runtime.isCLI = isCliMode !== null ? isCliMode : (process.argv[1]?.endsWith('index.js') || process.argv[1]?.endsWith('cli.js') || false);
81
-
82
- const context = {
83
- config,
84
- storage,
85
- defaults,
86
- campaigns: campaignData,
87
- runtime,
88
-
89
- // Helper methods for updating state
90
- incrementOperations() {
91
- runtime.operations++;
92
- },
93
-
94
- incrementEvents() {
95
- runtime.eventCount++;
96
- },
97
-
98
- incrementUsers() {
99
- runtime.userCount++;
100
- },
101
-
102
- setStorage(storageObj) {
103
- this.storage = storageObj;
104
- },
105
-
106
- // Getter methods for runtime state
107
- getOperations() {
108
- return runtime.operations;
109
- },
110
-
111
- getEventCount() {
112
- return runtime.eventCount;
113
- },
114
-
115
- getUserCount() {
116
- return runtime.userCount;
117
- },
118
-
119
- incrementUserCount() {
120
- runtime.userCount++;
121
- },
122
-
123
- incrementEventCount() {
124
- runtime.eventCount++;
125
- },
126
-
127
- isBatchMode() {
128
- return runtime.isBatchMode;
129
- },
130
-
131
- isCLI() {
132
- return runtime.isCLI;
133
- },
134
-
135
- // Time helper methods
136
- getTimeShift() {
137
- const actualNow = dayjs().add(2, "day");
138
- return actualNow.diff(dayjs.unix(global.FIXED_NOW), "seconds");
139
- },
140
-
141
- getDaysShift() {
142
- const actualNow = dayjs().add(2, "day");
143
- return actualNow.diff(dayjs.unix(global.FIXED_NOW), "days");
144
- },
145
-
146
- // Time constants (previously globals)
147
- FIXED_NOW: global.FIXED_NOW,
148
- FIXED_BEGIN: global.FIXED_BEGIN
149
- };
150
-
151
- return context;
68
+ // Import campaign data (could be made configurable)
69
+ const campaignData = campaigns;
70
+
71
+ // Create computed defaults based on config
72
+ const defaults = createDefaults(config, campaignData);
73
+
74
+ // Create runtime state
75
+ const runtime = createRuntimeState();
76
+
77
+ // Set runtime flags from config
78
+ runtime.verbose = config.verbose || false;
79
+ runtime.isBatchMode = config.batchSize && config.batchSize < config.numEvents;
80
+ runtime.isCLI = isCliMode !== null ? isCliMode : (process.argv[1]?.endsWith('index.js') || process.argv[1]?.endsWith('entry.js') || false);
81
+ if (runtime.isCLI) runtime.verbose = true; // Always verbose in CLI mode
82
+
83
+ const context = {
84
+ config,
85
+ storage,
86
+ defaults,
87
+ campaigns: campaignData,
88
+ runtime,
89
+
90
+ // Helper methods for updating state
91
+ incrementOperations() {
92
+ runtime.operations++;
93
+ },
94
+
95
+ incrementEvents() {
96
+ runtime.eventCount++;
97
+ },
98
+
99
+ incrementUsers() {
100
+ runtime.userCount++;
101
+ },
102
+
103
+ setStorage(storageObj) {
104
+ this.storage = storageObj;
105
+ },
106
+
107
+ // Getter methods for runtime state
108
+ getOperations() {
109
+ return runtime.operations;
110
+ },
111
+
112
+ getEventCount() {
113
+ return runtime.eventCount;
114
+ },
115
+
116
+ getUserCount() {
117
+ return runtime.userCount;
118
+ },
119
+
120
+ incrementUserCount() {
121
+ runtime.userCount++;
122
+ },
123
+
124
+ incrementEventCount() {
125
+ runtime.eventCount++;
126
+ },
127
+
128
+ isBatchMode() {
129
+ return runtime.isBatchMode;
130
+ },
131
+
132
+ isCLI() {
133
+ return runtime.isCLI;
134
+ },
135
+
136
+ // Time helper methods
137
+ getTimeShift() {
138
+ const actualNow = dayjs().add(2, "day");
139
+ return actualNow.diff(dayjs.unix(global.FIXED_NOW), "seconds");
140
+ },
141
+
142
+ getDaysShift() {
143
+ const actualNow = dayjs().add(2, "day");
144
+ return actualNow.diff(dayjs.unix(global.FIXED_NOW), "days");
145
+ },
146
+
147
+ // Time constants (previously globals)
148
+ FIXED_NOW: global.FIXED_NOW,
149
+ FIXED_BEGIN: global.FIXED_BEGIN
150
+ };
151
+
152
+ return context;
152
153
  }
153
154
 
154
155
  /**
@@ -158,8 +159,8 @@ export function createContext(config, storage = null, isCliMode = null) {
158
159
  * @returns {Context} Updated context object
159
160
  */
160
161
  export function updateContextWithStorage(context, storage) {
161
- context.storage = storage;
162
- return context;
162
+ context.storage = storage;
163
+ return context;
163
164
  }
164
165
 
165
166
  /**
@@ -168,14 +169,14 @@ export function updateContextWithStorage(context, storage) {
168
169
  * @throws {Error} If context is missing required properties
169
170
  */
170
171
  export function validateContext(context) {
171
- const required = ['config', 'defaults', 'campaigns', 'runtime'];
172
- const missing = required.filter(prop => !context[prop]);
173
-
174
- if (missing.length > 0) {
175
- throw new Error(`Context is missing required properties: ${missing.join(', ')}`);
176
- }
177
-
178
- if (!context.config.numUsers || !context.config.numEvents) {
179
- throw new Error('Context config must have numUsers and numEvents');
180
- }
172
+ const required = ['config', 'defaults', 'campaigns', 'runtime'];
173
+ const missing = required.filter(prop => !context[prop]);
174
+
175
+ if (missing.length > 0) {
176
+ throw new Error(`Context is missing required properties: ${missing.join(', ')}`);
177
+ }
178
+
179
+ if (!context.config.numUsers || !context.config.numEvents) {
180
+ throw new Error('Context config must have numUsers and numEvents');
181
+ }
181
182
  }
@@ -75,28 +75,41 @@ export async function createHookArray(arr = [], opts = {}) {
75
75
  if (item === null || item === undefined) return false;
76
76
  if (typeof item === 'object' && Object.keys(item).length === 0) return false;
77
77
 
78
- const allMetaData = { ...rest, ...meta };
79
-
80
- if (Array.isArray(item)) {
81
- for (const i of item) {
78
+ // Performance optimization: skip hook overhead for passthrough hooks
79
+ const isPassthroughHook = hook.toString().includes('return record') || hook.length === 1;
80
+
81
+ if (isPassthroughHook) {
82
+ // Fast path for passthrough hooks - no transformation needed
83
+ if (Array.isArray(item)) {
84
+ arr.push(...item);
85
+ } else {
86
+ arr.push(item);
87
+ }
88
+ } else {
89
+ // Slow path for actual transformation hooks
90
+ const allMetaData = { ...rest, ...meta };
91
+
92
+ if (Array.isArray(item)) {
93
+ for (const i of item) {
94
+ try {
95
+ const enriched = await hook(i, type, allMetaData);
96
+ if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
97
+ else arr.push(enriched);
98
+ } catch (e) {
99
+ console.error(`\n\nyour hook had an error\n\n`, e);
100
+ arr.push(i);
101
+ }
102
+ }
103
+ } else {
82
104
  try {
83
- const enriched = await hook(i, type, allMetaData);
105
+ const enriched = await hook(item, type, allMetaData);
84
106
  if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
85
107
  else arr.push(enriched);
86
108
  } catch (e) {
87
109
  console.error(`\n\nyour hook had an error\n\n`, e);
88
- arr.push(i);
110
+ arr.push(item);
89
111
  }
90
112
  }
91
- } else {
92
- try {
93
- const enriched = await hook(item, type, allMetaData);
94
- if (Array.isArray(enriched)) enriched.forEach(e => arr.push(e));
95
- else arr.push(enriched);
96
- } catch (e) {
97
- console.error(`\n\nyour hook had an error\n\n`, e);
98
- arr.push(item);
99
- }
100
113
  }
101
114
 
102
115
  if (arr.length > BATCH_SIZE) {
@@ -6,7 +6,6 @@
6
6
  /** @typedef {import('../../types').Context} Context */
7
7
 
8
8
  import dayjs from "dayjs";
9
- import { md5 } from "ak-tools";
10
9
  import * as u from "../utils/utils.js";
11
10
 
12
11
  /**
@@ -74,7 +73,7 @@ function createAdSpendEvent(network, campaign, day, chance) {
74
73
 
75
74
  // Create unique identifiers
76
75
  const id = network.utm_source[0] + '-' + campaign;
77
- const uid = md5(id);
76
+ const uid = u.quickHash(id);
78
77
 
79
78
  return {
80
79
  event: "$ad_spend",
@@ -10,7 +10,6 @@
10
10
  /** @typedef {import('../../types').Context} Context */
11
11
 
12
12
  import dayjs from "dayjs";
13
- import { md5 } from "ak-tools";
14
13
  import * as u from "../utils/utils.js";
15
14
 
16
15
  /**
@@ -77,7 +76,7 @@ export async function makeEvent(
77
76
 
78
77
  // Add default properties based on configuration
79
78
  if (hasLocation) {
80
- defaultProps.location = u.shuffleArray(defaults.locationsEvents()).pop();
79
+ defaultProps.location = u.pickRandom(defaults.locationsEvents());
81
80
  }
82
81
 
83
82
  if (hasBrowser) {
@@ -91,13 +90,13 @@ export async function makeEvent(
91
90
 
92
91
  // Add campaigns with attribution likelihood
93
92
  if (hasCampaigns && chance.bool({ likelihood: 25 })) {
94
- defaultProps.campaigns = u.shuffleArray(defaults.campaigns()).pop();
93
+ defaultProps.campaigns = u.pickRandom(defaults.campaigns());
95
94
  }
96
95
 
97
96
  // Select device from pool
98
97
  const devices = devicePool.flat();
99
98
  if (devices.length) {
100
- defaultProps.device = u.shuffleArray(devices).pop();
99
+ defaultProps.device = u.pickRandom(devices);
101
100
  }
102
101
 
103
102
  // Set event time using TimeSoup for realistic distribution
@@ -128,8 +127,8 @@ export async function makeEvent(
128
127
  eventTemplate.user_id = distinct_id;
129
128
  }
130
129
 
131
- // Merge custom properties with super properties
132
- const props = { ...chosenEvent.properties, ...superProps };
130
+ // Merge custom properties with super properties
131
+ const props = Object.assign({}, chosenEvent.properties, superProps);
133
132
 
134
133
  // Add custom properties from event configuration
135
134
  for (const key in props) {
@@ -150,7 +149,9 @@ export async function makeEvent(
150
149
  addGroupProperties(eventTemplate, groupKeys);
151
150
 
152
151
  // Generate unique insert_id
153
- eventTemplate.insert_id = md5(JSON.stringify(eventTemplate));
152
+ const distinctId = eventTemplate.user_id || eventTemplate.device_id || eventTemplate.distinct_id || distinct_id;
153
+ const tuple = `${eventTemplate.event}-${eventTemplate.time}-${distinctId}`;
154
+ eventTemplate.insert_id = u.quickHash(tuple);
154
155
 
155
156
  // Apply time shift to move events to current timeline
156
157
  if (earliestTime) {
@@ -6,7 +6,6 @@
6
6
  /** @typedef {import('../../types').Context} Context */
7
7
 
8
8
  import dayjs from "dayjs";
9
- import { clone } from "ak-tools";
10
9
  import * as u from "../utils/utils.js";
11
10
  import { makeEvent } from "./events.js";
12
11
 
@@ -129,7 +128,7 @@ function buildFunnelEvents(context, sequence, chosenFunnelProps) {
129
128
 
130
129
  return sequence.map((eventName) => {
131
130
  const foundEvent = config.events?.find((e) => e.event === eventName);
132
- const eventSpec = clone(foundEvent) || { event: eventName, properties: {} };
131
+ const eventSpec = u.deepClone(foundEvent) || { event: eventName, properties: {} };
133
132
 
134
133
  // Process event properties
135
134
  for (const key in eventSpec.properties) {
@@ -6,7 +6,6 @@
6
6
  /** @typedef {import('../../types').Context} Context */
7
7
 
8
8
  import dayjs from "dayjs";
9
- import { clone } from "ak-tools";
10
9
  import * as u from "../utils/utils.js";
11
10
 
12
11
  /**
@@ -44,7 +43,7 @@ export async function makeMirror(context) {
44
43
  if (shouldProcessEvent(oldEvent.event, events)) {
45
44
  // Clone event only when needed
46
45
  if (!newEvent) {
47
- newEvent = clone(oldEvent);
46
+ newEvent = u.deepClone(oldEvent);
48
47
  }
49
48
 
50
49
  // Apply the specified strategy
@@ -7,7 +7,8 @@
7
7
 
8
8
  import dayjs from "dayjs";
9
9
  import path from "path";
10
- import { clone, comma, ls, rm } from "ak-tools";
10
+ import { comma, ls, rm } from "ak-tools";
11
+ import * as u from "../utils/utils.js";
11
12
  import mp from "mixpanel-import";
12
13
 
13
14
  /**
@@ -67,10 +68,11 @@ export async function sendToMixpanel(context) {
67
68
  // Import events
68
69
  if (eventData || isBATCH_MODE) {
69
70
  log(`importing events to mixpanel...\n`);
70
- let eventDataToImport = clone(eventData);
71
+ let eventDataToImport = u.deepClone(eventData);
71
72
  if (isBATCH_MODE) {
72
73
  const writeDir = eventData.getWriteDir();
73
74
  const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
75
+ // @ts-ignore
74
76
  eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
75
77
  }
76
78
  const imported = await mp(creds, eventDataToImport, {
@@ -84,10 +86,11 @@ export async function sendToMixpanel(context) {
84
86
  // Import user profiles
85
87
  if (userProfilesData || isBATCH_MODE) {
86
88
  log(`importing user profiles to mixpanel...\n`);
87
- let userProfilesToImport = clone(userProfilesData);
89
+ let userProfilesToImport = u.deepClone(userProfilesData);
88
90
  if (isBATCH_MODE) {
89
91
  const writeDir = userProfilesData.getWriteDir();
90
92
  const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
93
+ // @ts-ignore
91
94
  userProfilesToImport = files.filter(f => f.includes('-USERS-'));
92
95
  }
93
96
  const imported = await mp(creds, userProfilesToImport, {
@@ -101,10 +104,11 @@ export async function sendToMixpanel(context) {
101
104
  // Import ad spend data
102
105
  if (groupEventData || isBATCH_MODE) {
103
106
  log(`importing ad spend data to mixpanel...\n`);
104
- let adSpendDataToImport = clone(adSpendData);
107
+ let adSpendDataToImport = u.deepClone(adSpendData);
105
108
  if (isBATCH_MODE) {
106
109
  const writeDir = adSpendData.getWriteDir();
107
110
  const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
111
+ // @ts-ignore
108
112
  adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
109
113
  }
110
114
  const imported = await mp(creds, adSpendDataToImport, {
@@ -120,10 +124,11 @@ export async function sendToMixpanel(context) {
120
124
  for (const groupEntity of groupProfilesData) {
121
125
  const groupKey = groupEntity?.groupKey;
122
126
  log(`importing ${groupKey} profiles to mixpanel...\n`);
123
- let groupProfilesToImport = clone(groupEntity);
127
+ let groupProfilesToImport = u.deepClone(groupEntity);
124
128
  if (isBATCH_MODE) {
125
129
  const writeDir = groupEntity.getWriteDir();
126
130
  const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
131
+ // @ts-ignore
127
132
  groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
128
133
  }
129
134
  const imported = await mp({ token, groupKey }, groupProfilesToImport, {
@@ -138,10 +143,11 @@ export async function sendToMixpanel(context) {
138
143
  // Import group events
139
144
  if (groupEventData || isBATCH_MODE) {
140
145
  log(`importing group events to mixpanel...\n`);
141
- let groupEventDataToImport = clone(groupEventData);
146
+ let groupEventDataToImport = u.deepClone(groupEventData);
142
147
  if (isBATCH_MODE) {
143
148
  const writeDir = groupEventData.getWriteDir();
144
149
  const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
150
+ // @ts-ignore
145
151
  groupEventDataToImport = files.filter(f => f.includes('-GROUP-EVENTS-'));
146
152
  }
147
153
  const imported = await mp(creds, groupEventDataToImport, {
@@ -160,10 +166,11 @@ export async function sendToMixpanel(context) {
160
166
  for (const scdEntity of scdTableData) {
161
167
  const scdKey = scdEntity?.scdKey;
162
168
  log(`importing ${scdKey} SCD data to mixpanel...\n`);
163
- let scdDataToImport = clone(scdEntity);
169
+ let scdDataToImport = u.deepClone(scdEntity);
164
170
  if (isBATCH_MODE) {
165
171
  const writeDir = scdEntity.getWriteDir();
166
172
  const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
173
+ // @ts-ignore
167
174
  scdDataToImport = files.filter(f => f.includes(`-SCD-${scdKey}`));
168
175
  }
169
176
 
@@ -198,6 +205,7 @@ export async function sendToMixpanel(context) {
198
205
  if (!writeToDisk && isBATCH_MODE) {
199
206
  const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
200
207
  const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
208
+ // @ts-ignore
201
209
  const files = listDir.filter(f =>
202
210
  f.includes('-EVENTS-') ||
203
211
  f.includes('-USERS-') ||
@@ -7,6 +7,7 @@
7
7
 
8
8
  import dayjs from "dayjs";
9
9
  import pLimit from 'p-limit';
10
+ import os from 'os';
10
11
  import * as u from "../utils/utils.js";
11
12
  import * as t from 'ak-tools';
12
13
  import { makeEvent } from "../generators/events.js";
@@ -22,7 +23,7 @@ import { makeSCD } from "../generators/scd.js";
22
23
  export async function userLoop(context) {
23
24
  const { config, storage, defaults } = context;
24
25
  const chance = u.getChance();
25
- const concurrency = config?.concurrency || 1;
26
+ const concurrency = config?.concurrency || Math.min(os.cpus().length * 2, 16);
26
27
  const USER_CONN = pLimit(concurrency);
27
28
 
28
29
  const {
@@ -45,8 +46,12 @@ export async function userLoop(context) {
45
46
  const avgEvPerUser = numEvents / numUsers;
46
47
  const startTime = Date.now();
47
48
 
49
+ // Create batches for parallel processing
50
+ const batchSize = Math.max(1, Math.ceil(numUsers / concurrency));
51
+ const userPromises = [];
52
+
48
53
  for (let i = 0; i < numUsers; i++) {
49
- await USER_CONN(async () => {
54
+ const userPromise = USER_CONN(async () => {
50
55
  context.incrementUserCount();
51
56
  const eps = Math.floor(context.getEventCount() / ((Date.now() - startTime) / 1000));
52
57
 
@@ -73,7 +78,7 @@ export async function userLoop(context) {
73
78
  : dayjs.unix(global.FIXED_BEGIN);
74
79
 
75
80
  if (hasLocation) {
76
- const location = u.shuffleArray(u.choose(defaults.locationsUsers)).pop();
81
+ const location = u.pickRandom(u.choose(defaults.locationsUsers));
77
82
  for (const key in location) {
78
83
  user[key] = location[key];
79
84
  }
@@ -83,6 +88,7 @@ export async function userLoop(context) {
83
88
  const profile = await makeUserProfile(context, userProps, user);
84
89
 
85
90
  // SCD creation
91
+ // @ts-ignore
86
92
  const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
87
93
  const scdTableKeys = Object.keys(scdUserTables);
88
94
 
@@ -129,12 +135,12 @@ export async function userLoop(context) {
129
135
  const timeShift = context.getTimeShift();
130
136
  userFirstEventTime = dayjs(data[0].time).subtract(timeShift, 'seconds').unix();
131
137
  numEventsPreformed += data.length;
132
- usersEvents.push(...data);
138
+ usersEvents = usersEvents.concat(data);
133
139
 
134
140
  if (!userConverted) {
135
- if (verbose) {
136
- u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
137
- }
141
+ // if (verbose) {
142
+ // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
143
+ // }
138
144
  return;
139
145
  }
140
146
  } else {
@@ -146,11 +152,11 @@ export async function userLoop(context) {
146
152
  const currentFunnel = chance.pickone(usageFunnels);
147
153
  const [data, userConverted] = await makeFunnel(context, currentFunnel, user, userFirstEventTime, profile, userSCD);
148
154
  numEventsPreformed += data.length;
149
- usersEvents.push(...data);
155
+ usersEvents = usersEvents.concat(data);
150
156
  } else {
151
157
  const data = await makeEvent(context, distinct_id, userFirstEventTime, u.pick(config.events), user.anonymousIds, user.sessionIds, {}, config.groupKeys, true);
152
158
  numEventsPreformed++;
153
- usersEvents.push(data);
159
+ usersEvents = usersEvents.concat(data);
154
160
  }
155
161
  }
156
162
 
@@ -187,8 +193,13 @@ export async function userLoop(context) {
187
193
  await eventData.hookPush(usersEvents, { profile });
188
194
 
189
195
  if (verbose) {
190
- u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
196
+ // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
191
197
  }
192
198
  });
199
+
200
+ userPromises.push(userPromise);
193
201
  }
202
+
203
+ // Wait for all users to complete
204
+ await Promise.all(userPromises);
194
205
  }
@@ -1,7 +1,7 @@
1
1
  import fs from 'fs';
2
2
  import Chance from 'chance';
3
3
  import readline from 'readline';
4
- import { comma, uid, clone } from 'ak-tools';
4
+ import { comma, uid} from 'ak-tools';
5
5
  import { spawn } from 'child_process';
6
6
  import dayjs from 'dayjs';
7
7
  import utc from 'dayjs/plugin/utc.js';
@@ -187,9 +187,26 @@ function choose(value) {
187
187
  const chance = getChance();
188
188
 
189
189
  try {
190
- // Keep resolving the value if it's a function
190
+ // Keep resolving the value if it's a function (with caching)
191
191
  while (typeof value === 'function') {
192
- value = value();
192
+ const funcString = value.toString();
193
+
194
+ // Check cache for weighted array functions
195
+ if (typeof global.weightedArrayCache === 'undefined') {
196
+ global.weightedArrayCache = new Map();
197
+ }
198
+
199
+ if (global.weightedArrayCache.has(funcString)) {
200
+ value = global.weightedArrayCache.get(funcString);
201
+ break;
202
+ }
203
+
204
+ const result = value();
205
+ if (Array.isArray(result) && result.length > 10) {
206
+ // Cache large arrays (likely weighted arrays)
207
+ global.weightedArrayCache.set(funcString, result);
208
+ }
209
+ value = result;
193
210
  }
194
211
 
195
212
  if (Array.isArray(value) && value.length === 0) {
@@ -666,6 +683,21 @@ function pickAWinner(items, mostChosenIndex) {
666
683
  };
667
684
  }
668
685
 
686
+ function quickHash(str, seed = 0) {
687
+ let h1 = 0xdeadbeef ^ seed, h2 = 0x41c6ce57 ^ seed;
688
+ for (let i = 0, ch; i < str.length; i++) {
689
+ ch = str.charCodeAt(i);
690
+ h1 = Math.imul(h1 ^ ch, 2654435761);
691
+ h2 = Math.imul(h2 ^ ch, 1597334677);
692
+ }
693
+ h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507);
694
+ h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909);
695
+ h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507);
696
+ h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909);
697
+
698
+ return (4294967296 * (2097151 & h2) + (h1 >>> 0)).toString();
699
+ };
700
+
669
701
  /*
670
702
  ----
671
703
  SHUFFLERS
@@ -677,11 +709,20 @@ function shuffleArray(array) {
677
709
  const chance = getChance();
678
710
  for (let i = array.length - 1; i > 0; i--) {
679
711
  const j = chance.integer({ min: 0, max: i });
680
- [array[i], array[j]] = [array[j], array[i]];
712
+ const temp = array[i];
713
+ array[i] = array[j];
714
+ array[j] = temp;
681
715
  }
682
716
  return array;
683
717
  }
684
718
 
719
+ function pickRandom(array) {
720
+ const chance = getChance();
721
+ if (!array || array.length === 0) return undefined;
722
+ const randomIndex = chance.integer({ min: 0, max: array.length - 1 });
723
+ return array[randomIndex];
724
+ }
725
+
685
726
  function shuffleExceptFirst(array) {
686
727
  if (array.length <= 1) return array;
687
728
  const restShuffled = shuffleArray(array.slice(1));
@@ -877,16 +918,26 @@ function buildFileNames(config) {
877
918
  * @param {[string, number][]} arrayOfArrays
878
919
  */
879
920
  function progress(arrayOfArrays) {
880
- readline.cursorTo(process.stdout, 0);
881
- let message = "";
882
- for (const status of arrayOfArrays) {
883
- const [thing, p] = status;
884
- message += `${thing} processed: ${comma(p)}\t\t`;
885
- }
886
-
887
- process.stdout.write(message);
888
- };
889
-
921
+ const terminalWidth = process.stdout.columns || 120;
922
+
923
+ // Clear the entire line
924
+ readline.cursorTo(process.stdout, 0);
925
+ readline.clearLine(process.stdout, 0);
926
+
927
+ // Build message with better formatting
928
+ const items = arrayOfArrays.map(([thing, p]) => {
929
+ return `${thing}: ${comma(p)}`;
930
+ });
931
+
932
+ const message = items.join(' │ ');
933
+
934
+ // Ensure we don't exceed terminal width
935
+ const finalMessage = message.length > terminalWidth
936
+ ? message.substring(0, terminalWidth - 3) + '...'
937
+ : message.padEnd(terminalWidth, ' ');
938
+
939
+ process.stdout.write(finalMessage);
940
+ }
890
941
 
891
942
  function openFinder(path, callback) {
892
943
  path = path || '/';
@@ -906,7 +957,6 @@ function getUniqueKeys(data) {
906
957
  };
907
958
 
908
959
 
909
-
910
960
  /*
911
961
  ----
912
962
  CORE
@@ -953,7 +1003,7 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
953
1003
  let totalRange = latestTime - earliestTime;
954
1004
  if (totalRange <= 0 || earliestTime > latestTime) {
955
1005
  //just flip earliest and latest
956
- let tempEarly = latestTime
1006
+ let tempEarly = latestTime;
957
1007
  let tempLate = earliestTime;
958
1008
  earliestTime = tempEarly;
959
1009
  latestTime = tempLate;
@@ -967,23 +1017,22 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
967
1017
  const chunkEnd = chunkStart + chunkSize;
968
1018
  const chunkMid = (chunkStart + chunkEnd) / 2;
969
1019
 
970
- // Generate a single timestamp within this chunk using a normal distribution centered at chunkMid
971
- let offset;
972
- let iterations = 0;
973
- let isValidTime = false;
974
- do {
975
- iterations++;
976
- soupHits++;
977
- offset = chance.normal({ mean: mean, dev: chunkSize / deviation });
978
- isValidTime = validTime(chunkMid + offset, earliestTime, latestTime);
979
- if (iterations > 25000) {
980
- if (process.env?.NODE_ENV === 'dev') debugger;
981
- throw `${iterations} iterations... exceeded`;
982
- }
983
- } while (chunkMid + offset < chunkStart || chunkMid + offset > chunkEnd);
1020
+ // Optimized timestamp generation - clamp to valid range instead of looping
1021
+ const maxDeviation = chunkSize / deviation;
1022
+ let offset = chance.normal({ mean: mean, dev: maxDeviation });
1023
+
1024
+ // Clamp to chunk boundaries to prevent infinite loops
1025
+ const proposedTime = chunkMid + offset;
1026
+ const clampedTime = Math.max(chunkStart, Math.min(chunkEnd, proposedTime));
1027
+
1028
+ // Ensure it's within the overall valid range
1029
+ const finalTime = Math.max(earliestTime, Math.min(latestTime, clampedTime));
1030
+
1031
+ // Update soup hits counter (keep for compatibility)
1032
+ soupHits++;
984
1033
 
985
1034
  try {
986
- return dayjs.unix(chunkMid + offset).toISOString();
1035
+ return dayjs.unix(finalTime).toISOString();
987
1036
  }
988
1037
 
989
1038
  catch (e) {
@@ -994,8 +1043,6 @@ function TimeSoup(earliestTime, latestTime, peaks = 5, deviation = 2, mean = 0)
994
1043
  }
995
1044
 
996
1045
 
997
-
998
-
999
1046
  /**
1000
1047
  * @param {string} userId
1001
1048
  * @param {number} bornDaysAgo=30
@@ -1151,6 +1198,61 @@ function generateEmoji(max = 10, array = false) {
1151
1198
  };
1152
1199
  };
1153
1200
 
1201
+ function deepClone(thing, opts) {
1202
+ // Handle primitives first (most common case)
1203
+ if (thing === null || thing === undefined) return thing;
1204
+
1205
+ const type = typeof thing;
1206
+ if (type !== 'object' && type !== 'function') {
1207
+ if (type === 'symbol') {
1208
+ return Symbol(thing.description);
1209
+ }
1210
+ return thing;
1211
+ }
1212
+
1213
+ // Handle arrays (common case)
1214
+ if (Array.isArray(thing)) {
1215
+ const result = new Array(thing.length);
1216
+ for (let i = 0; i < thing.length; i++) {
1217
+ result[i] = deepClone(thing[i], opts);
1218
+ }
1219
+ return result;
1220
+ }
1221
+
1222
+ // Handle other object types
1223
+ if (thing instanceof Date) return new Date(thing.getTime());
1224
+ if (thing instanceof RegExp) return new RegExp(thing.source, thing.flags);
1225
+ if (thing instanceof Function) {
1226
+ return opts && opts.newFns ?
1227
+ new Function('return ' + thing.toString())() :
1228
+ thing;
1229
+ }
1230
+
1231
+ // Handle plain objects
1232
+ if (thing.constructor === Object) {
1233
+ const newObject = {};
1234
+ const keys = Object.keys(thing);
1235
+ for (let i = 0; i < keys.length; i++) {
1236
+ const key = keys[i];
1237
+ newObject[key] = deepClone(thing[key], opts);
1238
+ }
1239
+ return newObject;
1240
+ }
1241
+
1242
+ // Handle other object types
1243
+ try {
1244
+ return new thing.constructor(thing);
1245
+ } catch (e) {
1246
+ // Fallback for objects that can't be constructed this way
1247
+ const newObject = Object.create(Object.getPrototypeOf(thing));
1248
+ const keys = Object.keys(thing);
1249
+ for (let i = 0; i < keys.length; i++) {
1250
+ const key = keys[i];
1251
+ newObject[key] = deepClone(thing[key], opts);
1252
+ }
1253
+ return newObject;
1254
+ }
1255
+ };
1154
1256
 
1155
1257
 
1156
1258
  export {
@@ -1159,13 +1261,14 @@ export {
1159
1261
  dates,
1160
1262
  day,
1161
1263
  choose,
1264
+ pickRandom,
1162
1265
  exhaust,
1163
1266
  integer,
1164
1267
  TimeSoup,
1165
1268
  companyName,
1166
1269
  generateEmoji,
1167
1270
  hasSameKeys as haveSameKeys,
1168
-
1271
+ deepClone,
1169
1272
  initChance,
1170
1273
  getChance,
1171
1274
 
@@ -1182,6 +1285,7 @@ export {
1182
1285
  getUniqueKeys,
1183
1286
  person,
1184
1287
  pickAWinner,
1288
+ quickHash,
1185
1289
  weighArray,
1186
1290
  weighFunnels,
1187
1291
  validateEventConfig,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "2.0.17",
3
+ "version": "2.0.19",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -22,13 +22,13 @@
22
22
  "post": "npm publish",
23
23
  "deps": "./scripts/update-deps.sh",
24
24
  "test": "NODE_ENV=test vitest run",
25
- "test:watch": "NODE_ENV=test vitest",
26
- "test:ui": "NODE_ENV=test vitest --ui",
27
25
  "coverage": "vitest run --coverage && open ./coverage/index.html",
28
26
  "typecheck": "tsc --noEmit",
29
27
  "new:dungeon": "./scripts/new-dungeon.sh",
30
28
  "new:project": "node ./scripts/new-project.mjs",
31
- "exp:benchmark": "node --no-warnings --experimental-vm-modules ./tests/benchmark/concurrency.mjs",
29
+ "exp:benchmark": "node ./tests/benchmark/concurrency.mjs",
30
+ "benchmark:phase1": "node ./tests/benchmark/phase1-performance.mjs",
31
+ "test:quick": "node ./tests/benchmark/quick-test.mjs",
32
32
  "exp:soup": "node ./tests/testSoup.mjs",
33
33
  "func:local": "functions-framework --target=entry",
34
34
  "func:deploy": "./scripts/deploy.sh"
@@ -38,7 +38,7 @@
38
38
  "url": "git+https://github.com/ak--47/make-mp-data.git"
39
39
  },
40
40
  "bin": {
41
- "make-mp-data": "./cli.js"
41
+ "make-mp-data": "./entry.js"
42
42
  },
43
43
  "keywords": [
44
44
  "mixpanel",