make-mp-data 1.5.56 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.claude/settings.local.json +21 -0
  2. package/.gcloudignore +2 -1
  3. package/.vscode/launch.json +6 -17
  4. package/.vscode/settings.json +31 -2
  5. package/dungeons/media.js +371 -0
  6. package/index.js +353 -1766
  7. package/{components → lib/cli}/cli.js +25 -6
  8. package/lib/cloud-function.js +20 -0
  9. package/lib/core/config-validator.js +248 -0
  10. package/lib/core/context.js +180 -0
  11. package/lib/core/storage.js +268 -0
  12. package/{components → lib/data}/defaults.js +17 -14
  13. package/lib/generators/adspend.js +133 -0
  14. package/lib/generators/events.js +242 -0
  15. package/lib/generators/funnels.js +330 -0
  16. package/lib/generators/mirror.js +168 -0
  17. package/lib/generators/profiles.js +93 -0
  18. package/lib/generators/scd.js +102 -0
  19. package/lib/orchestrators/mixpanel-sender.js +222 -0
  20. package/lib/orchestrators/user-loop.js +194 -0
  21. package/lib/orchestrators/worker-manager.js +200 -0
  22. package/{components → lib/utils}/ai.js +8 -36
  23. package/{components → lib/utils}/chart.js +9 -9
  24. package/{components → lib/utils}/project.js +4 -4
  25. package/{components → lib/utils}/utils.js +35 -23
  26. package/package.json +15 -15
  27. package/scripts/dana.mjs +137 -0
  28. package/scripts/new-dungeon.sh +7 -6
  29. package/scripts/update-deps.sh +2 -1
  30. package/tests/cli.test.js +28 -25
  31. package/tests/e2e.test.js +38 -36
  32. package/tests/int.test.js +151 -56
  33. package/tests/testSoup.mjs +1 -1
  34. package/tests/unit.test.js +15 -14
  35. package/tsconfig.json +1 -1
  36. package/types.d.ts +68 -11
  37. package/vitest.config.js +47 -0
  38. package/log.json +0 -1678
  39. package/tests/jest.config.js +0 -47
  40. /package/{components → lib/utils}/prompt.txt +0 -0
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Mirror dataset generator module
3
+ * Creates mirror datasets in a future state with different transformation strategies
4
+ */
5
+
6
+ /** @typedef {import('../../types').Context} Context */
7
+
8
+ import dayjs from "dayjs";
9
+ import { clone } from "ak-tools";
10
+ import * as u from "../utils/utils.js";
11
+
12
+ /**
13
+ * Takes event data and creates mirror datasets in a future state
14
+ * depending on the mirror strategy configuration
15
+ * @param {Context} context - Context object containing config, defaults, etc.
16
+ * @returns {Promise<void>}
17
+ */
18
+ export async function makeMirror(context) {
19
+ const { config, storage } = context;
20
+ const { mirrorProps } = config;
21
+ const { eventData, mirrorEventData } = storage;
22
+
23
+ if (!mirrorProps || Object.keys(mirrorProps).length === 0) {
24
+ return; // No mirror properties configured
25
+ }
26
+
27
+ const now = dayjs();
28
+
29
+ for (const oldEvent of eventData) {
30
+ let newEvent = null;
31
+ const eventTime = dayjs(oldEvent.time);
32
+ const delta = now.diff(eventTime, "day");
33
+
34
+ for (const mirrorProp in mirrorProps) {
35
+ const prop = mirrorProps[mirrorProp];
36
+ const {
37
+ daysUnfilled = 7,
38
+ events = "*",
39
+ strategy = "create",
40
+ values = []
41
+ } = prop;
42
+
43
+ // Check if this event should be processed
44
+ if (shouldProcessEvent(oldEvent.event, events)) {
45
+ // Clone event only when needed
46
+ if (!newEvent) {
47
+ newEvent = clone(oldEvent);
48
+ }
49
+
50
+ // Apply the specified strategy
51
+ applyMirrorStrategy(
52
+ strategy,
53
+ newEvent,
54
+ oldEvent,
55
+ mirrorProp,
56
+ values,
57
+ delta,
58
+ daysUnfilled
59
+ );
60
+ }
61
+ }
62
+
63
+ // Push the processed event (or original if no changes)
64
+ const mirrorDataPoint = newEvent || oldEvent;
65
+ await mirrorEventData.hookPush(mirrorDataPoint);
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Determines if an event should be processed based on event filter
71
+ * @param {string} eventName - Name of the event to check
72
+ * @param {string|Array} eventFilter - Event filter ("*" for all, or array of event names)
73
+ * @returns {boolean} True if event should be processed
74
+ */
75
+ function shouldProcessEvent(eventName, eventFilter) {
76
+ if (eventFilter === "*") {
77
+ return true;
78
+ }
79
+
80
+ if (Array.isArray(eventFilter)) {
81
+ return eventFilter.includes(eventName);
82
+ }
83
+
84
+ return false;
85
+ }
86
+
87
+ /**
88
+ * Applies the specified mirror strategy to an event
89
+ * @param {string} strategy - Mirror strategy to apply
90
+ * @param {Object} newEvent - Event object to modify
91
+ * @param {Object} oldEvent - Original event object
92
+ * @param {string} propName - Property name to modify
93
+ * @param {Array} values - Possible values for the property
94
+ * @param {number} delta - Days between event time and now
95
+ * @param {number} daysUnfilled - Days threshold for fill strategy
96
+ */
97
+ function applyMirrorStrategy(strategy, newEvent, oldEvent, propName, values, delta, daysUnfilled) {
98
+ switch (strategy) {
99
+ case "create":
100
+ // Always add the property with a random value
101
+ newEvent[propName] = u.choose(values);
102
+ break;
103
+
104
+ case "delete":
105
+ // Remove the property from the event
106
+ delete newEvent[propName];
107
+ break;
108
+
109
+ case "fill":
110
+ // Fill missing properties if enough time has passed
111
+ if (delta >= daysUnfilled) {
112
+ oldEvent[propName] = u.choose(values);
113
+ }
114
+ newEvent[propName] = u.choose(values);
115
+ break;
116
+
117
+ case "update":
118
+ // Update only if property doesn't exist
119
+ if (!oldEvent[propName]) {
120
+ newEvent[propName] = u.choose(values);
121
+ } else {
122
+ newEvent[propName] = oldEvent[propName];
123
+ }
124
+ break;
125
+
126
+ default:
127
+ throw new Error(`Unknown mirror strategy: ${strategy}`);
128
+ }
129
+ }
130
+
131
+ /**
132
+ * Validates mirror properties configuration
133
+ * @param {Object} mirrorProps - Mirror properties configuration to validate
134
+ * @returns {boolean} True if valid, throws error if invalid
135
+ */
136
+ export function validateMirrorProps(mirrorProps) {
137
+ if (!mirrorProps || typeof mirrorProps !== 'object') {
138
+ return true; // Mirror props are optional
139
+ }
140
+
141
+ const validStrategies = ['create', 'delete', 'fill', 'update'];
142
+
143
+ for (const [propName, config] of Object.entries(mirrorProps)) {
144
+ if (!config || typeof config !== 'object') {
145
+ throw new Error(`Mirror property '${propName}' must have a configuration object`);
146
+ }
147
+
148
+ const { strategy = 'create', values = [], events = '*', daysUnfilled = 7 } = config;
149
+
150
+ if (!validStrategies.includes(strategy)) {
151
+ throw new Error(`Invalid mirror strategy '${strategy}' for property '${propName}'. Must be one of: ${validStrategies.join(', ')}`);
152
+ }
153
+
154
+ if (strategy !== 'delete' && (!values || !Array.isArray(values) || values.length === 0)) {
155
+ throw new Error(`Mirror property '${propName}' with strategy '${strategy}' must have non-empty values array`);
156
+ }
157
+
158
+ if (events !== '*' && (!Array.isArray(events) || events.length === 0)) {
159
+ throw new Error(`Mirror property '${propName}' events filter must be "*" or non-empty array`);
160
+ }
161
+
162
+ if (typeof daysUnfilled !== 'number' || daysUnfilled < 0) {
163
+ throw new Error(`Mirror property '${propName}' daysUnfilled must be a non-negative number`);
164
+ }
165
+ }
166
+
167
+ return true;
168
+ }
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Profile generator module
3
+ * Creates user and group profiles with realistic properties
4
+ */
5
+
6
+ /** @typedef {import('../../types').Context} Context */
7
+
8
+ import * as u from "../utils/utils.js";
9
+
10
+ /**
11
+ * Creates a user or group profile by choosing from available property values
12
+ * @param {Context} context - Context object containing config, defaults, etc.
13
+ * @param {Object} props - Properties to include in the profile
14
+ * @param {Object} defaults - Default values to merge with props
15
+ * @returns {Promise<Object>} Generated profile object
16
+ */
17
+ export async function makeProfile(context, props = {}, defaults = {}) {
18
+ // Update operation counter
19
+ context.incrementOperations();
20
+
21
+ // Keys that should not be processed with the choose function
22
+ const keysToNotChoose = ["anonymousIds", "sessionIds"];
23
+
24
+ // Start with defaults
25
+ const profile = { ...defaults };
26
+
27
+ // Process default values first
28
+ for (const key in profile) {
29
+ if (keysToNotChoose.includes(key)) continue;
30
+
31
+ try {
32
+ profile[key] = u.choose(profile[key]);
33
+ } catch (e) {
34
+ console.error(`Error processing default property ${key}:`, e);
35
+ // Keep original value on error
36
+ }
37
+ }
38
+
39
+ // Process provided props (these override defaults)
40
+ for (const key in props) {
41
+ try {
42
+ profile[key] = u.choose(props[key]);
43
+ } catch (e) {
44
+ console.error(`Error processing property ${key}:`, e);
45
+ // Keep original value on error
46
+ }
47
+ }
48
+
49
+ return profile;
50
+ }
51
+
52
+ /**
53
+ * Creates a user profile with typical user properties
54
+ * @param {Context} context - Context object
55
+ * @param {Object} userProps - User-specific properties
56
+ * @param {Object} baseProfile - Base profile to extend
57
+ * @returns {Promise<Object>} Generated user profile
58
+ */
59
+ export async function makeUserProfile(context, userProps = {}, baseProfile = {}) {
60
+ const { config } = context;
61
+
62
+ // Combine user props with any configured user properties
63
+ const combinedProps = {
64
+ ...config.userProps,
65
+ ...userProps
66
+ };
67
+
68
+ return makeProfile(context, combinedProps, baseProfile);
69
+ }
70
+
71
+ /**
72
+ * Creates a group profile with group-specific properties
73
+ * @param {Context} context - Context object
74
+ * @param {string} groupKey - Group identifier
75
+ * @param {Object} groupProps - Group-specific properties
76
+ * @param {Object} baseProfile - Base profile to extend
77
+ * @returns {Promise<Object>} Generated group profile
78
+ */
79
+ export async function makeGroupProfile(context, groupKey, groupProps = {}, baseProfile = {}) {
80
+ const { config } = context;
81
+
82
+ // Get group properties from config for this specific group
83
+ const configGroupProps = config.groupProps?.[groupKey] || {};
84
+
85
+ // Combine with provided props
86
+ const combinedProps = {
87
+ ...configGroupProps,
88
+ ...groupProps,
89
+ groupKey // Always include the group key
90
+ };
91
+
92
+ return makeProfile(context, combinedProps, baseProfile);
93
+ }
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Slowly Changing Dimensions (SCD) generator module
3
+ * Creates time-series data showing how properties change over time
4
+ */
5
+
6
+ /** @typedef {import('../../types').Context} Context */
7
+
8
+ import dayjs from "dayjs";
9
+ import * as u from "../utils/utils.js";
10
+ import { makeProfile } from "./profiles.js";
11
+
12
+ /**
13
+ * Creates SCD (Slowly Changing Dimensions) entries for a given property
14
+ * @param {Context} context - Context object containing config, defaults, etc.
15
+ * @param {Array|Object} scdProp - SCD property configuration or simple array of values
16
+ * @param {string} scdKey - Key name for the SCD property
17
+ * @param {string} distinct_id - User/entity identifier
18
+ * @param {number} mutations - Number of mutations to create
19
+ * @param {string | number} created - Creation timestamp for the entity
20
+ * @returns {Promise<Array>} Array of SCD entries
21
+ */
22
+ export async function makeSCD(context, scdProp, scdKey, distinct_id, mutations, created) {
23
+ // Convert simple array to full configuration object
24
+ if (Array.isArray(scdProp)) {
25
+ scdProp = {
26
+ values: scdProp,
27
+ frequency: 'week',
28
+ max: 10,
29
+ timing: 'fuzzy',
30
+ type: 'user'
31
+ };
32
+ }
33
+ if (typeof created === 'number') created = dayjs.unix(created).toISOString();
34
+
35
+ const { frequency, max, timing, values, type = "user" } = scdProp;
36
+
37
+ // Return empty array if no values provided
38
+ if (JSON.stringify(values) === "{}" || JSON.stringify(values) === "[]") {
39
+ return [];
40
+ }
41
+
42
+ const scdEntries = [];
43
+ let lastInserted = dayjs(created);
44
+ const deltaDays = dayjs().diff(lastInserted, "day");
45
+ const uuidKeyName = type === 'user' ? 'distinct_id' : type;
46
+
47
+ for (let i = 0; i < mutations; i++) {
48
+ // Stop if we've reached the current time
49
+ if (lastInserted.isAfter(dayjs())) break;
50
+
51
+ // Create profile with the SCD property
52
+ const scd = await makeProfile(context, { [scdKey]: values }, { [uuidKeyName]: distinct_id });
53
+
54
+ // Create SCD entry with all required properties
55
+ const scdEntry = {
56
+ ...scd,
57
+ [uuidKeyName]: scd.distinct_id || distinct_id,
58
+ startTime: null,
59
+ insertTime: null
60
+ };
61
+
62
+ // Set start time based on timing strategy
63
+ if (timing === 'fixed') {
64
+ switch (frequency) {
65
+ case "day":
66
+ scdEntry.startTime = lastInserted.add(1, "day").startOf('day').toISOString();
67
+ break;
68
+ case "week":
69
+ scdEntry.startTime = lastInserted.add(1, "week").startOf('week').toISOString();
70
+ break;
71
+ case "month":
72
+ scdEntry.startTime = lastInserted.add(1, "month").startOf('month').toISOString();
73
+ break;
74
+ }
75
+ }
76
+
77
+ if (timing === 'fuzzy') {
78
+ scdEntry.startTime = lastInserted.toISOString();
79
+ }
80
+
81
+ // Set insert time (slightly after start time)
82
+ const insertTime = lastInserted.add(u.integer(1, 9000), "seconds");
83
+ scdEntry.insertTime = insertTime.toISOString();
84
+
85
+ // Only add entry if all required properties are set
86
+ if (scdEntry.hasOwnProperty('insertTime') && scdEntry.hasOwnProperty('startTime')) {
87
+ scdEntries.push(scdEntry);
88
+ }
89
+
90
+ // Advance time for next entry
91
+ lastInserted = lastInserted
92
+ .add(u.integer(0, deltaDays), "day")
93
+ .subtract(u.integer(1, 9000), "seconds");
94
+ }
95
+
96
+ // De-duplicate on startTime
97
+ const deduped = scdEntries.filter((entry, index, self) =>
98
+ index === self.findIndex((t) => t.startTime === entry.startTime)
99
+ );
100
+
101
+ return deduped;
102
+ }
@@ -0,0 +1,222 @@
1
+ /**
2
+ * Mixpanel Sender Orchestrator module
3
+ * Handles sending all data types to Mixpanel
4
+ */
5
+
6
+ /** @typedef {import('../../types').Context} Context */
7
+
8
+ import dayjs from "dayjs";
9
+ import path from "path";
10
+ import { clone, comma, ls, rm } from "ak-tools";
11
+ import mp from "mixpanel-import";
12
+
13
+ /**
14
+ * Sends the data to Mixpanel
15
+ * @param {Context} context - Context object containing config, storage, etc.
16
+ * @returns {Promise<Object>} Import results for all data types
17
+ */
18
+ export async function sendToMixpanel(context) {
19
+ const { config, storage } = context;
20
+ const {
21
+ adSpendData,
22
+ eventData,
23
+ groupProfilesData,
24
+ lookupTableData,
25
+ mirrorEventData,
26
+ scdTableData,
27
+ userProfilesData,
28
+ groupEventData
29
+ } = storage;
30
+
31
+ const {
32
+ token,
33
+ region,
34
+ writeToDisk = true,
35
+ format,
36
+ serviceAccount,
37
+ projectId,
38
+ serviceSecret
39
+ } = config;
40
+
41
+ const importResults = { events: {}, users: {}, groups: [] };
42
+ const isBATCH_MODE = context.isBatchMode();
43
+ const isCLI = context.isCLI();
44
+ const NODE_ENV = process.env.NODE_ENV || "unknown";
45
+
46
+ /** @type {import('mixpanel-import').Creds} */
47
+ const creds = { token };
48
+ const mpImportFormat = format === "json" ? "jsonl" : "csv";
49
+
50
+ /** @type {import('mixpanel-import').Options} */
51
+ const commonOpts = {
52
+ region,
53
+ fixData: true,
54
+ verbose: false,
55
+ forceStream: true,
56
+ strict: true,
57
+ epochEnd: dayjs().unix(),
58
+ dryRun: false,
59
+ abridged: false,
60
+ fixJson: true,
61
+ showProgress: NODE_ENV === "dev" ? true : false,
62
+ streamFormat: mpImportFormat
63
+ };
64
+
65
+ if (isCLI) commonOpts.showProgress = true;
66
+
67
+ // Import events
68
+ if (eventData || isBATCH_MODE) {
69
+ log(`importing events to mixpanel...\n`);
70
+ let eventDataToImport = clone(eventData);
71
+ if (isBATCH_MODE) {
72
+ const writeDir = eventData.getWriteDir();
73
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
74
+ eventDataToImport = files.filter(f => f.includes('-EVENTS-'));
75
+ }
76
+ const imported = await mp(creds, eventDataToImport, {
77
+ recordType: "event",
78
+ ...commonOpts,
79
+ });
80
+ log(`\tsent ${comma(imported.success)} events\n`);
81
+ importResults.events = imported;
82
+ }
83
+
84
+ // Import user profiles
85
+ if (userProfilesData || isBATCH_MODE) {
86
+ log(`importing user profiles to mixpanel...\n`);
87
+ let userProfilesToImport = clone(userProfilesData);
88
+ if (isBATCH_MODE) {
89
+ const writeDir = userProfilesData.getWriteDir();
90
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
91
+ userProfilesToImport = files.filter(f => f.includes('-USERS-'));
92
+ }
93
+ const imported = await mp(creds, userProfilesToImport, {
94
+ recordType: "user",
95
+ ...commonOpts,
96
+ });
97
+ log(`\tsent ${comma(imported.success)} user profiles\n`);
98
+ importResults.users = imported;
99
+ }
100
+
101
+ // Import ad spend data
102
+ if (groupEventData || isBATCH_MODE) {
103
+ log(`importing ad spend data to mixpanel...\n`);
104
+ let adSpendDataToImport = clone(adSpendData);
105
+ if (isBATCH_MODE) {
106
+ const writeDir = adSpendData.getWriteDir();
107
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
108
+ adSpendDataToImport = files.filter(f => f.includes('-AD-SPEND-'));
109
+ }
110
+ const imported = await mp(creds, adSpendDataToImport, {
111
+ recordType: "event",
112
+ ...commonOpts,
113
+ });
114
+ log(`\tsent ${comma(imported.success)} ad spend events\n`);
115
+ importResults.adSpend = imported;
116
+ }
117
+
118
+ // Import group profiles
119
+ if (groupProfilesData || isBATCH_MODE) {
120
+ for (const groupEntity of groupProfilesData) {
121
+ const groupKey = groupEntity?.groupKey;
122
+ log(`importing ${groupKey} profiles to mixpanel...\n`);
123
+ let groupProfilesToImport = clone(groupEntity);
124
+ if (isBATCH_MODE) {
125
+ const writeDir = groupEntity.getWriteDir();
126
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
127
+ groupProfilesToImport = files.filter(f => f.includes(`-GROUPS-${groupKey}`));
128
+ }
129
+ const imported = await mp({ token, groupKey }, groupProfilesToImport, {
130
+ recordType: "group",
131
+ ...commonOpts,
132
+ });
133
+ log(`\tsent ${comma(imported.success)} ${groupKey} profiles\n`);
134
+ importResults.groups.push(imported);
135
+ }
136
+ }
137
+
138
+ // Import group events
139
+ if (groupEventData || isBATCH_MODE) {
140
+ log(`importing group events to mixpanel...\n`);
141
+ let groupEventDataToImport = clone(groupEventData);
142
+ if (isBATCH_MODE) {
143
+ const writeDir = groupEventData.getWriteDir();
144
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
145
+ groupEventDataToImport = files.filter(f => f.includes('-GROUP-EVENTS-'));
146
+ }
147
+ const imported = await mp(creds, groupEventDataToImport, {
148
+ recordType: "event",
149
+ ...commonOpts,
150
+ strict: false
151
+ });
152
+ log(`\tsent ${comma(imported.success)} group events\n`);
153
+ importResults.groupEvents = imported;
154
+ }
155
+
156
+ // Import SCD data (requires service account)
157
+ if (serviceAccount && projectId && serviceSecret) {
158
+ if (scdTableData || isBATCH_MODE) {
159
+ log(`importing SCD data to mixpanel...\n`);
160
+ for (const scdEntity of scdTableData) {
161
+ const scdKey = scdEntity?.scdKey;
162
+ log(`importing ${scdKey} SCD data to mixpanel...\n`);
163
+ let scdDataToImport = clone(scdEntity);
164
+ if (isBATCH_MODE) {
165
+ const writeDir = scdEntity.getWriteDir();
166
+ const files = await ls(writeDir.split(path.basename(writeDir)).join(""));
167
+ scdDataToImport = files.filter(f => f.includes(`-SCD-${scdKey}`));
168
+ }
169
+
170
+ /** @type {import('mixpanel-import').Options} */
171
+ const options = {
172
+ recordType: "scd",
173
+ scdKey,
174
+ scdType: scdEntity.dataType,
175
+ scdLabel: `${scdKey}-scd`,
176
+ ...commonOpts,
177
+ };
178
+
179
+ if (scdEntity.entityType !== "user") options.groupKey = scdEntity.entityType;
180
+
181
+ const imported = await mp(
182
+ {
183
+ token,
184
+ acct: serviceAccount,
185
+ pass: serviceSecret,
186
+ project: projectId
187
+ },
188
+ scdDataToImport,
189
+ options
190
+ );
191
+ log(`\tsent ${comma(imported.success)} ${scdKey} SCD data\n`);
192
+ importResults[`${scdKey}_scd`] = imported;
193
+ }
194
+ }
195
+ }
196
+
197
+ // Clean up batch files if needed
198
+ if (!writeToDisk && isBATCH_MODE) {
199
+ const writeDir = eventData?.getWriteDir() || userProfilesData?.getWriteDir();
200
+ const listDir = await ls(writeDir.split(path.basename(writeDir)).join(""));
201
+ const files = listDir.filter(f =>
202
+ f.includes('-EVENTS-') ||
203
+ f.includes('-USERS-') ||
204
+ f.includes('-AD-SPEND-') ||
205
+ f.includes('-GROUPS-') ||
206
+ f.includes('-GROUP-EVENTS-')
207
+ );
208
+ for (const file of files) {
209
+ await rm(file);
210
+ }
211
+ }
212
+
213
+ return importResults;
214
+ }
215
+
216
+ /**
217
+ * Simple logging function
218
+ * @param {string} message - Message to log
219
+ */
220
+ function log(message) {
221
+ console.log(message);
222
+ }