make-mp-data 2.1.11 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +31 -0
  2. package/dungeons/adspend.js +35 -1
  3. package/dungeons/anon.js +25 -1
  4. package/dungeons/array-of-object-lookup.js +201 -0
  5. package/dungeons/benchmark-heavy.js +241 -0
  6. package/dungeons/benchmark-light.js +141 -0
  7. package/dungeons/big.js +10 -9
  8. package/dungeons/business.js +60 -12
  9. package/dungeons/complex.js +35 -1
  10. package/dungeons/copilot.js +383 -0
  11. package/dungeons/education.js +1005 -0
  12. package/dungeons/experiments.js +18 -4
  13. package/dungeons/fintech.js +976 -0
  14. package/dungeons/foobar.js +32 -0
  15. package/dungeons/food.js +988 -0
  16. package/dungeons/funnels.js +38 -1
  17. package/dungeons/gaming.js +26 -5
  18. package/dungeons/media.js +861 -270
  19. package/dungeons/mil.js +31 -3
  20. package/dungeons/mirror.js +33 -1
  21. package/dungeons/retention-cadence.js +211 -0
  22. package/dungeons/rpg.js +1178 -0
  23. package/dungeons/sanity.js +32 -2
  24. package/dungeons/sass.js +923 -0
  25. package/dungeons/scd.js +47 -1
  26. package/dungeons/simple.js +29 -14
  27. package/dungeons/social.js +928 -0
  28. package/dungeons/streaming.js +373 -0
  29. package/dungeons/strict-event-test.js +30 -0
  30. package/dungeons/student-teacher.js +19 -5
  31. package/dungeons/text-generation.js +120 -84
  32. package/dungeons/too-big-events.js +203 -0
  33. package/dungeons/{userAgent.js → user-agent.js} +23 -2
  34. package/entry.js +5 -4
  35. package/index.js +41 -54
  36. package/lib/core/config-validator.js +122 -7
  37. package/lib/core/context.js +7 -14
  38. package/lib/core/storage.js +57 -25
  39. package/lib/generators/adspend.js +12 -12
  40. package/lib/generators/events.js +6 -5
  41. package/lib/generators/funnels.js +32 -10
  42. package/lib/generators/product-lookup.js +262 -0
  43. package/lib/generators/product-names.js +195 -0
  44. package/lib/generators/profiles.js +3 -3
  45. package/lib/generators/scd.js +13 -3
  46. package/lib/generators/text.js +17 -4
  47. package/lib/orchestrators/mixpanel-sender.js +244 -204
  48. package/lib/orchestrators/user-loop.js +54 -16
  49. package/lib/templates/phrases.js +473 -16
  50. package/lib/templates/schema.d.ts +173 -0
  51. package/lib/templates/verbose-schema.js +140 -206
  52. package/lib/utils/chart.js +210 -0
  53. package/lib/utils/function-registry.js +285 -0
  54. package/lib/utils/json-evaluator.js +172 -0
  55. package/lib/utils/logger.js +34 -0
  56. package/lib/utils/utils.js +41 -4
  57. package/package.json +12 -21
  58. package/types.d.ts +15 -5
  59. package/dungeons/ai-chat-analytics-ed.js +0 -274
  60. package/dungeons/money2020-ed-also.js +0 -277
  61. package/dungeons/money2020-ed.js +0 -579
  62. package/lib/generators/text-bak-old.js +0 -1121
  63. package/lib/orchestrators/worker-manager.js +0 -203
  64. package/lib/templates/hooks-instructions.txt +0 -434
  65. package/lib/templates/phrases-bak.js +0 -925
  66. package/lib/templates/prompt (old).txt +0 -98
  67. package/lib/templates/schema-instructions.txt +0 -155
  68. package/lib/templates/scratch-dungeon-template.js +0 -116
  69. package/lib/templates/textQuickTest.js +0 -172
  70. package/lib/utils/ai.js +0 -120
  71. package/lib/utils/project.js +0 -166
@@ -17,211 +17,251 @@ import mp from "mixpanel-import";
17
17
  * @returns {Promise<Object>} Import results for all data types
18
18
  */
19
19
  export async function sendToMixpanel(context) {
20
- const { config, storage } = context;
21
- const {
22
- adSpendData,
23
- eventData,
24
- groupProfilesData,
25
- lookupTableData,
26
- mirrorEventData,
27
- scdTableData,
28
- userProfilesData,
29
- groupEventData
30
- } = storage;
31
-
32
- const {
33
- token,
34
- region,
35
- writeToDisk = true,
36
- format,
37
- serviceAccount,
38
- projectId,
39
- serviceSecret
40
- } = config;
41
-
42
- const importResults = { events: {}, users: {}, groups: [] };
43
- const isBATCH_MODE = context.isBatchMode();
44
- const isCLI = context.isCLI();
45
- const NODE_ENV = process.env.NODE_ENV || "unknown";
46
-
47
- /** @type {import('mixpanel-import').Creds} */
48
- const creds = { token };
49
- const mpImportFormat = format === "json" ? "jsonl" : "csv";
50
-
51
- /** @type {import('mixpanel-import').Options} */
52
- const commonOpts = {
53
- region,
54
- fixData: true,
55
- verbose: false,
56
- forceStream: true,
57
- strict: true,
58
- epochEnd: dayjs().unix(),
59
- dryRun: false,
60
- abridged: false,
61
- fixJson: false,
62
- showProgress: NODE_ENV === "dev" ? true : false,
63
- streamFormat: mpImportFormat,
20
+ const { config, storage } = context;
21
+ const {
22
+ adSpendData,
23
+ eventData,
24
+ groupProfilesData,
25
+ lookupTableData,
26
+ mirrorEventData,
27
+ scdTableData,
28
+ userProfilesData,
29
+ groupEventData
30
+ } = storage;
31
+
32
+ const {
33
+ token,
34
+ region,
35
+ writeToDisk = true,
36
+ format,
37
+ serviceAccount,
38
+ projectId,
39
+ serviceSecret
40
+ } = config;
41
+
42
+ const importResults = { events: {}, users: {}, groups: [] };
43
+ const isBATCH_MODE = context.isBatchMode();
44
+ const NODE_ENV = process.env.NODE_ENV || "unknown";
45
+
46
+ /** @type {import('mixpanel-import').Creds} */
47
+ const creds = { token };
48
+ const mpImportFormat = format === "json" ? "jsonl" : "csv";
49
+
50
+ const isDev = NODE_ENV !== 'production';
51
+
52
+ /** @type {import('mixpanel-import').Options} */
53
+ const commonOpts = {
54
+ region,
55
+ fixData: true,
56
+ verbose: isDev,
57
+ forceStream: true,
58
+ strict: true,
59
+ epochEnd: dayjs().unix(),
60
+ dryRun: false,
61
+ abridged: false,
62
+ fixJson: false,
63
+ showProgress: isDev,
64
+ streamFormat: mpImportFormat,
64
65
  workers: 35
65
- };
66
-
67
- if (isCLI) commonOpts.showProgress = true;
68
-
69
- // Import events
70
- if (eventData || isBATCH_MODE) {
71
- log(`importing events to mixpanel...\n`);
72
- let eventDataToImport = u.deepClone(eventData);
73
- if (isBATCH_MODE) {
74
- const writeDir = eventData.getWriteDir();
75
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
76
- // @ts-ignore
77
- eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
78
- }
79
- const imported = await mp(creds, eventDataToImport, {
80
- recordType: "event",
81
- ...commonOpts,
82
- });
83
- log(`\tsent ${comma(imported.success)} events\n`);
84
- importResults.events = imported;
85
- }
86
-
87
- // Import user profiles
88
- if (userProfilesData || isBATCH_MODE) {
89
- log(`importing user profiles to mixpanel...\n`);
90
- let userProfilesToImport = u.deepClone(userProfilesData);
91
- if (isBATCH_MODE) {
92
- const writeDir = userProfilesData.getWriteDir();
93
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
94
- // @ts-ignore
95
- userProfilesToImport = files.filter(f => f.includes('-USERS-'));
96
- }
97
- const imported = await mp(creds, userProfilesToImport, {
98
- recordType: "user",
99
- ...commonOpts,
100
- });
101
- log(`\tsent ${comma(imported.success)} user profiles\n`);
102
- importResults.users = imported;
103
- }
104
-
105
- // Import ad spend data
106
- if (adSpendData || isBATCH_MODE) {
107
- log(`importing ad spend data to mixpanel...\n`);
108
- let adSpendDataToImport = u.deepClone(adSpendData);
109
- if (isBATCH_MODE) {
110
- const writeDir = adSpendData.getWriteDir();
111
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
112
- // @ts-ignore
113
- adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
114
- }
115
- const imported = await mp(creds, adSpendDataToImport, {
116
- recordType: "event",
117
- ...commonOpts,
118
- });
119
- log(`\tsent ${comma(imported.success)} ad spend events\n`);
120
- importResults.adSpend = imported;
121
- }
122
-
123
- // Import group profiles
124
- if (groupProfilesData || isBATCH_MODE) {
125
- for (const groupEntity of groupProfilesData) {
126
- const groupKey = groupEntity?.groupKey;
127
- log(`importing ${groupKey} profiles to mixpanel...\n`);
128
- let groupProfilesToImport = u.deepClone(groupEntity);
129
- if (isBATCH_MODE) {
130
- const writeDir = groupEntity.getWriteDir();
131
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
132
- // @ts-ignore
133
- groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
134
- }
135
- const imported = await mp({ token, groupKey }, groupProfilesToImport, {
136
- recordType: "group",
137
- ...commonOpts,
66
+ };
67
+
68
+ // Import events
69
+ if (eventData?.length > 0 || isBATCH_MODE) {
70
+ log(`importing events to mixpanel...\n`);
71
+ let eventDataToImport = u.deepClone(eventData);
72
+ // Check if we need to read from disk files instead of memory
73
+ const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && eventData && eventData.length === 0);
74
+ if (shouldReadFromFiles && eventData?.getWriteDir) {
75
+ const writeDir = eventData.getWriteDir();
76
+ const files = await ls(writeDir);
77
+ // @ts-ignore
78
+ eventDataToImport = files.filter(f => f.includes('-EVENTS'));
79
+ }
80
+ const imported = await mp(creds, eventDataToImport, {
81
+ recordType: "event",
82
+ ...commonOpts,
83
+ });
84
+ log(`\tsent ${comma(imported.success)} events\n`);
85
+ importResults.events = imported;
86
+ }
87
+
88
+ // Import user profiles
89
+ if (userProfilesData?.length > 0 || isBATCH_MODE) {
90
+ log(`importing user profiles to mixpanel...\n`);
91
+ let userProfilesToImport = u.deepClone(userProfilesData);
92
+ // Check if we need to read from disk files instead of memory
93
+ const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && userProfilesData && userProfilesData.length === 0);
94
+ if (shouldReadFromFiles && userProfilesData?.getWriteDir) {
95
+ const writeDir = userProfilesData.getWriteDir();
96
+ const files = await ls(writeDir);
97
+ // @ts-ignore
98
+ userProfilesToImport = files.filter(f => f.includes('-USERS'));
99
+ }
100
+ const imported = await mp(creds, userProfilesToImport, {
101
+ recordType: "user",
102
+ ...commonOpts,
103
+ });
104
+ log(`\tsent ${comma(imported.success)} user profiles\n`);
105
+ importResults.users = imported;
106
+ }
107
+
108
+ // Import ad spend data
109
+ if (adSpendData?.length > 0 || isBATCH_MODE) {
110
+ log(`importing ad spend data to mixpanel...\n`);
111
+ let adSpendDataToImport = u.deepClone(adSpendData);
112
+ // Check if we need to read from disk files instead of memory
113
+ const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && adSpendData && adSpendData.length === 0);
114
+ if (shouldReadFromFiles && adSpendData?.getWriteDir) {
115
+ const writeDir = adSpendData.getWriteDir();
116
+ const files = await ls(writeDir);
117
+ // @ts-ignore
118
+ adSpendDataToImport = files.filter(f => f.includes('-ADSPEND'));
119
+ }
120
+ const imported = await mp(creds, adSpendDataToImport, {
121
+ recordType: "event",
122
+ ...commonOpts,
123
+ });
124
+ log(`\tsent ${comma(imported.success)} ad spend events\n`);
125
+ importResults.adSpend = imported;
126
+ }
127
+
128
+ // Import group profiles
129
+ if (groupProfilesData && Array.isArray(groupProfilesData) && groupProfilesData.length > 0) {
130
+ for (const groupEntity of groupProfilesData) {
131
+ if (!groupEntity || groupEntity.length === 0) continue;
132
+ const groupKey = groupEntity?.groupKey;
133
+ log(`importing ${groupKey} profiles to mixpanel...\n`);
134
+ let groupProfilesToImport = u.deepClone(groupEntity);
135
+ // Check if we need to read from disk files instead of memory
136
+ const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && groupEntity.length === 0);
137
+ if (shouldReadFromFiles && groupEntity?.getWriteDir) {
138
+ const writeDir = groupEntity.getWriteDir();
139
+ const files = await ls(writeDir);
140
+ // @ts-ignore
141
+ groupProfilesToImport = files.filter(f => f.includes(`-${groupKey}-GROUPS`));
142
+ }
143
+ const imported = await mp({ token, groupKey }, groupProfilesToImport, {
144
+ recordType: "group",
145
+ ...commonOpts,
138
146
  groupKey,
139
- //dryRun: true
140
- });
141
- log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
142
- importResults.groups.push(imported);
143
- }
144
- }
145
-
146
- // Import group events
147
- if (groupEventData || isBATCH_MODE) {
148
- log(`importing group events to mixpanel...\n`);
149
- let groupEventDataToImport = u.deepClone(groupEventData);
150
- if (isBATCH_MODE) {
151
- const writeDir = groupEventData.getWriteDir();
152
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
153
- // @ts-ignore
154
- groupEventDataToImport = files.filter(f => f.includes('-GROUP-EVENTS-'));
155
- }
156
- const imported = await mp(creds, groupEventDataToImport, {
157
- recordType: "event",
158
- ...commonOpts,
159
- strict: false
160
- });
161
- log(`\tsent ${comma(imported.success)} group events\n`);
162
- importResults.groupEvents = imported;
163
- }
164
-
165
- // Import SCD data (requires service account)
166
- if (serviceAccount && projectId && serviceSecret) {
167
- if (scdTableData || isBATCH_MODE) {
168
- log(`importing SCD data to mixpanel...\n`);
169
- for (const scdEntity of scdTableData) {
170
- const scdKey = scdEntity?.scdKey;
171
- log(`importing ${scdKey} SCD data to mixpanel...\n`);
172
- let scdDataToImport = u.deepClone(scdEntity);
173
- if (isBATCH_MODE) {
174
- const writeDir = scdEntity.getWriteDir();
175
- const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
176
- // @ts-ignore
177
- scdDataToImport = files.filter(f => f.includes(`-SCD-${scdKey}`));
178
- }
179
-
180
- /** @type {import('mixpanel-import').Options} */
181
- const options = {
182
- recordType: "scd",
183
- scdKey,
184
- scdType: scdEntity.dataType,
185
- scdLabel: `${scdKey}-scd`,
186
- ...commonOpts,
187
- };
188
-
189
- if (scdEntity.entityType !== "user") options.groupKey = scdEntity.entityType;
190
-
191
- const imported = await mp(
192
- {
193
- token,
194
- acct: serviceAccount,
195
- pass: serviceSecret,
196
- project: projectId
197
- },
198
- scdDataToImport,
199
- options
200
- );
201
- log(`\tsent ${comma(imported.success)} ${scdKey} SCD data\n`);
202
- importResults[`${scdKey}_scd`] = imported;
203
- }
204
- }
205
- }
206
-
207
- // Clean up batch files if needed
208
- if (!writeToDisk && isBATCH_MODE) {
209
- const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
210
- const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
211
- // @ts-ignore
212
- const files = listDir.filter(f =>
213
- f.includes('-EVENTS-') ||
214
- f.includes('-USERS-') ||
215
- f.includes('-AD-SPEND-') ||
216
- f.includes('-GROUPS-') ||
217
- f.includes('-GROUP-EVENTS-')
218
- );
219
- for (const file of files) {
220
- await rm(file);
221
- }
222
- }
223
-
224
- return importResults;
147
+ });
148
+ log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
149
+ importResults.groups.push(imported);
150
+ }
151
+ }
152
+
153
+ // Import group events
154
+ if (groupEventData?.length > 0) {
155
+ log(`importing group events to mixpanel...\n`);
156
+ let groupEventDataToImport = u.deepClone(groupEventData);
157
+ // Check if we need to read from disk files instead of memory
158
+ const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && groupEventData.length === 0);
159
+ if (shouldReadFromFiles && groupEventData?.getWriteDir) {
160
+ const writeDir = groupEventData.getWriteDir();
161
+ const files = await ls(writeDir);
162
+ // @ts-ignore
163
+ groupEventDataToImport = files.filter(f => f.includes('-GROUP-EVENTS'));
164
+ }
165
+ const imported = await mp(creds, groupEventDataToImport, {
166
+ recordType: "event",
167
+ ...commonOpts,
168
+ strict: false
169
+ });
170
+ log(`\tsent ${comma(imported.success)} group events\n`);
171
+ importResults.groupEvents = imported;
172
+ }
173
+
174
+ // Import SCD data (requires service account)
175
+ if (serviceAccount && projectId && serviceSecret) {
176
+ if (scdTableData && Array.isArray(scdTableData) && scdTableData.length > 0) {
177
+ log(`importing SCD data to mixpanel...\n`);
178
+ for (const scdEntity of scdTableData) {
179
+ const scdKey = scdEntity?.scdKey;
180
+ const entityType = scdEntity?.entityType || 'user';
181
+ log(`importing ${scdKey} SCD data to mixpanel...\n`);
182
+ let scdDataToImport = u.deepClone(scdEntity);
183
+ // Check if we need to read from disk files instead of memory
184
+ const shouldReadFromFiles = isBATCH_MODE || (writeToDisk && scdEntity && scdEntity.length === 0);
185
+ if (shouldReadFromFiles && scdEntity?.getWriteDir) {
186
+ const writeDir = scdEntity.getWriteDir();
187
+ const files = await ls(writeDir);
188
+ // @ts-ignore
189
+ scdDataToImport = files.filter(f => f.includes(`-${scdKey}-SCD`))?.pop();
190
+
191
+ }
192
+
193
+ // Derive the data type from the actual SCD data
194
+ // todo: we can do better type inference here we don't need to visit the file
195
+ /** @type {"string" | "number" | "boolean"} */
196
+ let scdType = 'string'; // default to string
197
+ const scdExamplesValues = context.config.scdProps[Object.keys(context.config.scdProps).find(k => k === scdKey)].values;
198
+ if (scdExamplesValues) {
199
+ if (typeof scdExamplesValues[0] === 'number') {
200
+ scdType = 'number';
201
+ } else if (typeof scdExamplesValues[0] === 'boolean') {
202
+ scdType = 'boolean';
203
+ }
204
+ }
205
+
206
+
207
+
208
+ /** @type {import('mixpanel-import').Options} */
209
+ const options = {
210
+ recordType: "scd",
211
+ scdKey,
212
+ scdType,
213
+ scdLabel: `${scdKey}`,
214
+ fixData: true,
215
+ ...commonOpts,
216
+ };
217
+
218
+ // For group SCDs, add the groupKey
219
+ if (entityType !== "user") {
220
+ options.groupKey = entityType;
221
+ }
222
+
223
+ // SCD data is sketch and it shouldn't fail the whole import
224
+ try {
225
+ const imported = await mp(
226
+ {
227
+ token,
228
+ acct: serviceAccount,
229
+ pass: serviceSecret,
230
+ project: projectId
231
+ },
232
+ scdDataToImport,
233
+ options
234
+ );
235
+ log(`\tsent ${comma(imported.success)} ${scdKey} SCD data\n`);
236
+ importResults[`${scdKey}_scd`] = imported;
237
+ } catch (err) {
238
+ log(`\tfailed to import ${scdKey} SCD data: ${err.message}\n`);
239
+ importResults[`${scdKey}_scd`] = { success: 0, failed: 0, error: err.message };
240
+ }
241
+ }
242
+ }
243
+ }
244
+
245
+ // Clean up batch files if needed
246
+ if (!writeToDisk && isBATCH_MODE) {
247
+ const writeDir = eventData?.getWriteDir?.() || userProfilesData?.getWriteDir?.();
248
+ if (writeDir) {
249
+ const listDir = await ls(writeDir);
250
+ // @ts-ignore
251
+ const files = listDir.filter(f =>
252
+ f.includes('-EVENTS') ||
253
+ f.includes('-USERS') ||
254
+ f.includes('-ADSPEND') ||
255
+ f.includes('-GROUPS') ||
256
+ f.includes('-GROUP-EVENTS')
257
+ );
258
+ for (const file of files) {
259
+ await rm(file);
260
+ }
261
+ }
262
+ }
263
+
264
+ return importResults;
225
265
  }
226
266
 
227
267
  /**
@@ -229,5 +269,5 @@ export async function sendToMixpanel(context) {
229
269
  * @param {string} message - Message to log
230
270
  */
231
271
  function log(message) {
232
- console.log(message);
272
+ console.log(message);
233
273
  }
@@ -39,7 +39,9 @@ export async function userLoop(context) {
39
39
  userProps,
40
40
  scdProps,
41
41
  numDays,
42
- percentUsersBornInDataset = 5,
42
+ percentUsersBornInDataset = 15,
43
+ strictEventCount = false,
44
+ bornRecentBias = 0.3, // 0 = uniform distribution, 1 = heavily biased toward recent births
43
45
  } = config;
44
46
 
45
47
  const { eventData, userProfilesData, scdTableData } = storage;
@@ -50,16 +52,32 @@ export async function userLoop(context) {
50
52
  const batchSize = Math.max(1, Math.ceil(numUsers / concurrency));
51
53
  const userPromises = [];
52
54
 
55
+ // Track if we've already logged the strict event count message
56
+ let hasLoggedStrictCountReached = false;
57
+
53
58
  for (let i = 0; i < numUsers; i++) {
54
59
  const userPromise = USER_CONN(async () => {
60
+ // Bail out early if strictEventCount is enabled and we've hit numEvents
61
+ if (strictEventCount && context.getEventCount() >= numEvents) {
62
+ if (verbose && !hasLoggedStrictCountReached) {
63
+ console.log(`\n\u2713 Reached target of ${numEvents.toLocaleString()} events with strict event count enabled. Stopping user generation.`);
64
+ hasLoggedStrictCountReached = true;
65
+ }
66
+ return;
67
+ }
68
+
55
69
  context.incrementUserCount();
56
70
  const eps = Math.floor(context.getEventCount() / ((Date.now() - startTime) / 1000));
71
+ const memUsed = u.bytesHuman(process.memoryUsage().heapUsed);
72
+ const duration = u.formatDuration(Date.now() - startTime);
57
73
 
58
74
  if (verbose) {
59
75
  u.progress([
60
76
  ["users", context.getUserCount()],
61
77
  ["events", context.getEventCount()],
62
- ["eps", eps]
78
+ ["eps", eps],
79
+ ["mem", memUsed],
80
+ ["time", duration]
63
81
  ]);
64
82
  }
65
83
 
@@ -73,9 +91,36 @@ export async function userLoop(context) {
73
91
 
74
92
  // Calculate time adjustments
75
93
  const daysShift = context.getDaysShift();
76
- const adjustedCreated = userIsBornInDataset
77
- ? dayjs(created).subtract(daysShift, 'd')
78
- : dayjs.unix(global.FIXED_BEGIN);
94
+
95
+ // Apply recency bias to birth dates for users born in dataset
96
+ // bornRecentBias: 0 = uniform distribution, 1 = heavily biased toward recent
97
+ let adjustedCreated;
98
+ if (userIsBornInDataset) {
99
+ let biasedCreated = dayjs(created).subtract(daysShift, 'd');
100
+
101
+ if (bornRecentBias > 0) {
102
+ // Calculate how far into the dataset this user was born (0 = start, 1 = end/recent)
103
+ const datasetStart = dayjs.unix(global.FIXED_BEGIN);
104
+ const datasetEnd = dayjs.unix(context.FIXED_NOW);
105
+ const totalDuration = datasetEnd.diff(datasetStart);
106
+ // Clamp userPosition to [0, 1] to handle edge cases from rounding in time calculations
107
+ const userPosition = Math.max(0, Math.min(1, biasedCreated.diff(datasetStart) / totalDuration));
108
+
109
+ // Apply power function to bias toward recent (higher values)
110
+ // exponent < 1 shifts distribution toward 1 (recent)
111
+ const exponent = 1 - (bornRecentBias * 0.7); // 0.3 bias -> 0.79 exponent (gentle nudge)
112
+ const biasedPosition = Math.pow(userPosition, exponent);
113
+
114
+ // Convert back to timestamp
115
+ biasedCreated = datasetStart.add(biasedPosition * totalDuration, 'millisecond');
116
+ }
117
+
118
+ adjustedCreated = biasedCreated;
119
+ // Update user.created to match biased timestamp for profile consistency
120
+ user.created = adjustedCreated.toISOString();
121
+ } else {
122
+ adjustedCreated = dayjs.unix(global.FIXED_BEGIN);
123
+ }
79
124
 
80
125
  if (hasLocation) {
81
126
  const location = u.pickRandom(u.choose(defaults.locationsUsers));
@@ -89,10 +134,10 @@ export async function userLoop(context) {
89
134
 
90
135
  // Call user hook after profile creation
91
136
  if (config.hook) {
92
- await config.hook(profile, "user", {
93
- user,
137
+ await config.hook(profile, "user", {
138
+ user,
94
139
  config,
95
- userIsBornInDataset
140
+ userIsBornInDataset
96
141
  });
97
142
  }
98
143
 
@@ -152,9 +197,6 @@ export async function userLoop(context) {
152
197
  usersEvents = usersEvents.concat(data);
153
198
 
154
199
  if (!userConverted) {
155
- // if (verbose) {
156
- // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
157
- // }
158
200
  return;
159
201
  }
160
202
  } else {
@@ -206,10 +248,6 @@ export async function userLoop(context) {
206
248
  }
207
249
 
208
250
  await eventData.hookPush(usersEvents, { profile });
209
-
210
- if (verbose) {
211
- // u.progress([["users", context.getUserCount()], ["events", context.getEventCount()]]);
212
- }
213
251
  });
214
252
 
215
253
  userPromises.push(userPromise);
@@ -233,4 +271,4 @@ export function matchConditions(profile, conditions) {
233
271
  if (profile[key] !== value) return false;
234
272
  }
235
273
  return true;
236
- }
274
+ }