make-mp-data 1.5.51 → 1.5.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -193,9 +193,20 @@ function choose(value) {
193
193
  return chance.pickone(value);
194
194
  }
195
195
 
196
- // [[{}],[{}],[{}]] should return all
197
- if (Array.isArray(value) && typeof value[0] === "object" && hasSameKeys(value)) {
198
- return value;
196
+ // Now, if the resolved value is an array, use chance.pickone
197
+ if (Array.isArray(value) && value.every(item => typeof item === 'string')) {
198
+ return chance.pickone(value);
199
+ }
200
+
201
+ if (Array.isArray(value) && value.every(item => typeof item === 'number')) {
202
+ return chance.pickone(value);
203
+ }
204
+
205
+ if (Array.isArray(value) && value.every(item => typeof item === 'object')) {
206
+ if (hasSameKeys(value)) return value;
207
+ else {
208
+ if (process.env.NODE_ENV === "dev") debugger;
209
+ }
199
210
  }
200
211
 
201
212
  // ["","",""] should pick-a-winner
@@ -234,27 +245,27 @@ function choose(value) {
234
245
 
235
246
  function hasSameKeys(arr) {
236
247
  if (arr.length <= 1) {
237
- return true; // An empty array or an array with one object always has the same keys
248
+ return true; // An empty array or an array with one object always has the same keys
238
249
  }
239
-
250
+
240
251
  const firstKeys = Object.keys(arr[0]);
241
-
252
+
242
253
  for (let i = 1; i < arr.length; i++) {
243
- const currentKeys = Object.keys(arr[i]);
244
-
245
- if (currentKeys.length !== firstKeys.length) {
246
- return false; // Different number of keys
247
- }
248
-
249
- for (const key of firstKeys) {
250
- if (!currentKeys.includes(key)) {
251
- return false; // Key missing in current object
254
+ const currentKeys = Object.keys(arr[i]);
255
+
256
+ if (currentKeys.length !== firstKeys.length) {
257
+ return false; // Different number of keys
258
+ }
259
+
260
+ for (const key of firstKeys) {
261
+ if (!currentKeys.includes(key)) {
262
+ return false; // Key missing in current object
263
+ }
252
264
  }
253
- }
254
265
  }
255
-
266
+
256
267
  return true; // All objects have the same keys
257
- }
268
+ }
258
269
 
259
270
  /**
260
271
  * keeps picking from an array until the array is exhausted
File without changes
@@ -0,0 +1,288 @@
1
+ const run = require("../index.js");
2
+ const SEED = "my-seed";
3
+ const dayjs = require("dayjs");
4
+ const utc = require("dayjs/plugin/utc");
5
+ dayjs.extend(utc);
6
+ require("dotenv").config();
7
+ const u = require("../components/utils");
8
+ const v = require("ak-tools");
9
+ const { projectId, serviceAccount, serviceSecret } = require("./adspend");
10
+ const chance = u.initChance(SEED);
11
+ const num_users = 1_500;
12
+ const days = 91;
13
+
14
+ const GOVERNANCE_TOKEN = process.env.GOVERNANCE_TOKEN;
15
+ const GOVERNANCE_ID = process.env.GOVERNANCE_ID;
16
+ const GOVERNANCE_ACCT = process.env.GOVERNANCE_ACCT;
17
+ const GOVERNANCE_SECRET = process.env.GOVERNANCE_SECRET;
18
+
19
+ const URLS = [
20
+ "/",
21
+ "/refinance-student-loan/",
22
+ "/private-student-loans/",
23
+ "/personal-loans/",
24
+ "/home-loans/",
25
+ "/invest/",
26
+ "/banking/",
27
+ "/relay/",
28
+ "/life-insurance/",
29
+ "/learn/",
30
+ "/faq/",
31
+ "/career-advisory/",
32
+ "/member-benefits/",
33
+ "/experiences/",
34
+ "/member-stories/",
35
+ "/contact-us/",
36
+ "/jobs/",
37
+ "/on-the-money/",
38
+ "/about-sofi/",
39
+ "/management-team/",
40
+ "/eligibility-criteria/",
41
+ "/referral-program/",
42
+ "/sofi-at-work/"
43
+ ];
44
+
45
+ /** @type {Config} */
46
+ const commonOpts = {
47
+ verbose: true,
48
+ token: GOVERNANCE_TOKEN,
49
+ projectId: GOVERNANCE_ID,
50
+ serviceAccount: GOVERNANCE_ACCT,
51
+ serviceSecret: GOVERNANCE_SECRET,
52
+ numDays: days,
53
+ numEvents: num_users * 100,
54
+ numUsers: num_users,
55
+ hasAnonIds: false,
56
+ hasSessionIds: true,
57
+ format: "json",
58
+ alsoInferFunnels: true,
59
+ hasLocation: true,
60
+ hasAndroidDevices: false,
61
+ hasIOSDevices: false,
62
+ hasDesktopDevices: false,
63
+ hasBrowser: true,
64
+ hasCampaigns: false,
65
+ isAnonymous: false,
66
+ hasAdSpend: false,
67
+
68
+ hasAvatar: true,
69
+ makeChart: false,
70
+
71
+ batchSize: 1_500_000,
72
+ concurrency: 1,
73
+ writeToDisk: false,
74
+ percentUsersBornInDataset: 15
75
+
76
+ };
77
+
78
+ /** @typedef {import("../types").Dungeon} Config */
79
+
80
+ /** @type {Config} */
81
+ const diffNameSameMeaning = {
82
+ ...commonOpts,
83
+ name: "same-meaning",
84
+ hasAdSpend: true,
85
+ seed: "foo",
86
+ description: "A dungeon with really unclear event names",
87
+ funnels: [],
88
+ events: [
89
+ //@ts-ignore
90
+ " login", "login ", " sign in", "login", "Log In", "Sign In", "sign in", "signin", "page view", "page viewed", "viewed page", "view page", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "page_viewed", "page"
91
+ ],
92
+ superProps: {
93
+ "theme": "diff name; same meaning",
94
+ "url": u.pickAWinner(URLS, 0)
95
+ },
96
+ userProps: {},
97
+ hook: (a, type) => {
98
+ if (type === "everything") {
99
+ return a;
100
+ }
101
+
102
+ return a;
103
+ }
104
+ };
105
+
106
+
107
+ /** @type {Config} */
108
+ const sameNameDiffMeaning = {
109
+ ...commonOpts,
110
+ name: "diff-meanings",
111
+ seed: "bar",
112
+ description: "a dataset with a single event that means different things",
113
+ funnels: [],
114
+ events: [
115
+ {
116
+ event: "button click",
117
+ properties: {
118
+ "button name": ["start now", "register", "submit", "learn more", "try now", "free trial"],
119
+ "form name": u.pickAWinner(["mortgage", "student loan", "personal loan ", "credit card ", "savings account", "subscribe form"]),
120
+ "cta": ["free for 30d", "decision in 5min", "chat with a human", "try it in the app"],
121
+ }
122
+ }
123
+ ],
124
+ superProps: {
125
+ "theme": "same name; diff meaning",
126
+ "url": u.pickAWinner(URLS, 0)
127
+ },
128
+ userProps: {},
129
+ scdProps: {},
130
+ hook: (a, type) => {
131
+ if (type === "everything") {
132
+ return a;
133
+ }
134
+
135
+ return a;
136
+ }
137
+ };
138
+
139
+ /** @type {Config} */
140
+ const Borrowing = {
141
+ ...commonOpts,
142
+ percentUsersBornInDataset: 100,
143
+ alsoInferFunnels: false,
144
+ name: "borrowing",
145
+ seed: "baz",
146
+ description: "a dataset which requires borrowing",
147
+ funnels: [
148
+ {
149
+ "sequence": ["page viewed", "click!", "sign up"],
150
+ "isFirstFunnel": true,
151
+ "order": "sequential"
152
+ },
153
+ {
154
+ "sequence": ["start application", "submit application"],
155
+ "props": {
156
+ "product": u.pickAWinner(["mortgage", "student loan", "personal loan ", "credit card ", "savings account", "investing account"]),
157
+ "application Id": () => { return v.uid(10); },
158
+ }
159
+ }
160
+ ],
161
+ events: [
162
+ {
163
+ event: "page viewed",
164
+ properties: {
165
+ "marketing channel": u.pickAWinner(["organic", "meta", "google", "x", "snapchat", "youtube", "instagram"], 0),
166
+ }
167
+ },
168
+ {
169
+ event: "click!",
170
+ properties: {
171
+ "CTA clicked": u.pickAWinner(["start now", "register", "submit", "learn more", "try now", "free trial"]),
172
+ }
173
+ },
174
+ {
175
+ event: "sign up",
176
+ properties: {
177
+ "A/B test": ["fast onboarding", "custom colors", "localization", "new design"],
178
+ "Variant": ["A", "B", "C", "Control"]
179
+ },
180
+ isFirstEvent: true
181
+ },
182
+ {
183
+ event: "start application",
184
+
185
+ },
186
+ {
187
+ event: "submit application",
188
+ },
189
+ ],
190
+ superProps: {
191
+ "theme": "requires borrows",
192
+ "url": u.pickAWinner(URLS, 0)
193
+ },
194
+ userProps: {},
195
+ scdProps: {},
196
+ hook: (a, type) => {
197
+ if (type === "everything") {
198
+ return a;
199
+ }
200
+
201
+ return a;
202
+ }
203
+ };
204
+
205
+
206
+ /** @type {Config} */
207
+ const Modeling = {
208
+ ...commonOpts,
209
+ percentUsersBornInDataset: 100,
210
+ alsoInferFunnels: false,
211
+ name: "bad shapes",
212
+ seed: "qux",
213
+ description: "a data set with bad modeling",
214
+ funnels: [
215
+
216
+ ],
217
+ events: [
218
+ {
219
+ event: "nested nightmare",
220
+ properties: {
221
+ data: buildAStupidNestedObject
222
+ }
223
+ },
224
+ ],
225
+ superProps: {
226
+ "theme": "deeply nested",
227
+ "url": u.pickAWinner(URLS, 0)
228
+ },
229
+ userProps: {},
230
+ scdProps: {},
231
+ hook: (a, type) => {
232
+ if (type === "everything") {
233
+ return a;
234
+ }
235
+
236
+ return a;
237
+ }
238
+ };
239
+
240
+ const metaValues = u.pickAWinner(["foo", "bar", "baz", "qux", "quux", "corge", "grault", "garply", "waldo", "fred", "plugh", "xyzzy", "thud"]);
241
+ function buildAStupidNestedObject(depth = 0) {
242
+ let obj = {};
243
+ const keyPrefix = ["A", "B", "C", "D", "E", "F", "G", "H", "I"];
244
+ const keySuffix = ["1", "2", "3", "4", "5", "6", "7", "8", "9"];
245
+
246
+ let numKeys = chance.integer({ min: 1, max: 5 });
247
+ for (let i = 0; i < numKeys; i++) {
248
+ if (depth <= 3 && chance.bool({ likelihood: 10 })) {
249
+ let key = chance.pickone(keyPrefix) + ">" + chance.pickone(keySuffix);
250
+ obj[key] = buildAStupidNestedObject();
251
+ }
252
+ else {
253
+ let key = chance.pickone(keyPrefix) + "|" + chance.pickone(keySuffix);
254
+ let value = u.choose(metaValues);
255
+ obj[key] = value;
256
+ }
257
+ }
258
+ return obj;
259
+ }
260
+
261
+
262
+ const dungeons = [
263
+ diffNameSameMeaning,
264
+ sameNameDiffMeaning,
265
+ Borrowing,
266
+ Modeling
267
+ ];
268
+
269
+ async function runDungeons() {
270
+ const results = [];
271
+ for (let dungeon of dungeons) {
272
+ const job = await run(dungeon);
273
+ results.push(job);
274
+ }
275
+ return results;
276
+ }
277
+
278
+ runDungeons()
279
+ .then(results => {
280
+ debugger;
281
+ })
282
+ .catch(e => {
283
+ console.error(e);
284
+ debugger;
285
+
286
+ });
287
+
288
+ module.exports = {};
package/index.js CHANGED
@@ -124,8 +124,8 @@ async function main(config) {
124
124
  const scdTableKeys = Object.keys(scdProps);
125
125
  const scdTableData = await Promise.all(scdTableKeys.map(async (key) =>
126
126
  //todo don't assume everything is a string... lol
127
- // @ts-ignore
128
- await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, entityType: config.scdProps[key].type, dataType: "string", filepath: `${simulationName}-${scdProps[key]?.type || "user"}-SCD-${key}` })
127
+ // @ts-ignore
128
+ await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, entityType: config.scdProps[key]?.type || "user", dataType: "string", filepath: `${simulationName}-${scdProps[key]?.type || "user"}-SCD-${key}` })
129
129
  ));
130
130
  const groupTableKeys = Object.keys(groupKeys);
131
131
  const groupProfilesData = await Promise.all(groupTableKeys.map(async (key, index) => {
@@ -431,16 +431,20 @@ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, s
431
431
  let defaultProps = {};
432
432
  let devicePool = [];
433
433
 
434
- if (hasLocation) defaultProps.location = DEFAULTS.locationsEvents();
435
- if (hasBrowser) defaultProps.browser = DEFAULTS.browsers();
434
+ if (hasLocation) defaultProps.location = u.shuffleArray(DEFAULTS.locationsEvents()).pop();
435
+ if (hasBrowser) defaultProps.browser = u.choose(DEFAULTS.browsers());
436
436
  if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
437
437
  if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
438
438
  if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
439
439
 
440
440
  // we don't always have campaigns, because of attribution
441
- if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = DEFAULTS.campaigns();
441
+ if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = u.shuffleArray(DEFAULTS.campaigns()).pop();
442
442
  const devices = devicePool.flat();
443
- if (devices.length) defaultProps.device = devices;
443
+ if (devices.length) defaultProps.device = u.shuffleArray(devices).pop();
444
+
445
+
446
+
447
+
444
448
 
445
449
 
446
450
  //event time
@@ -505,6 +509,33 @@ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, s
505
509
  }
506
510
  }
507
511
  }
512
+ else if (typeof (defaultProps[key]) === "object") {
513
+ const obj = defaultProps[key];
514
+ for (const subKey in obj) {
515
+ if (Array.isArray(obj[subKey])) {
516
+ const subChoice = u.choose(obj[subKey]);
517
+ if (Array.isArray(subChoice)) {
518
+ for (const subSubChoice of subChoice) {
519
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subSubChoice;
520
+ }
521
+ }
522
+ else if (typeof subChoice === "object") {
523
+ for (const subSubKey in subChoice) {
524
+ if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = subChoice[subSubKey];
525
+ }
526
+ }
527
+ else {
528
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
529
+ }
530
+ }
531
+ else {
532
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = obj[subKey];
533
+ }
534
+ }
535
+ }
536
+ else {
537
+ if (!eventTemplate[key]) eventTemplate[key] = defaultProps[key];
538
+ }
508
539
  }
509
540
  }
510
541
 
@@ -725,12 +756,14 @@ async function makeFunnel(funnel, user, firstEventTime, profile, scd, config) {
725
756
  */
726
757
  async function makeProfile(props, defaults) {
727
758
  operations++;
759
+ const keysToNotChoose = ["anonymousIds", "sessionIds"];
728
760
 
729
761
  const profile = {
730
762
  ...defaults,
731
763
  };
732
764
 
733
765
  for (const key in profile) {
766
+ if (keysToNotChoose.includes(key)) continue;
734
767
  try {
735
768
  profile[key] = u.choose(profile[key]);
736
769
  }
@@ -763,7 +796,7 @@ async function makeProfile(props, defaults) {
763
796
  */
764
797
  async function makeSCD(scdProp, scdKey, distinct_id, mutations, created) {
765
798
  if (Array.isArray(scdProp)) scdProp = { values: scdProp, frequency: 'week', max: 10, timing: 'fuzzy', type: 'user' };
766
- const { frequency, max, timing, values, type } = scdProp;
799
+ const { frequency, max, timing, values, type = "user" } = scdProp;
767
800
  if (JSON.stringify(values) === "{}" || JSON.stringify(values) === "[]") return [];
768
801
  const scdEntries = [];
769
802
  let lastInserted = dayjs(created);
@@ -999,7 +1032,7 @@ async function userLoop(config, storage, concurrency = 1) {
999
1032
 
1000
1033
 
1001
1034
  // SCD creation
1002
- const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user');
1035
+ const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
1003
1036
  const scdTableKeys = Object.keys(scdUserTables);
1004
1037
 
1005
1038
 
@@ -1338,6 +1371,11 @@ function validateDungeonConfig(config) {
1338
1371
  config.simulationName = name || makeName();
1339
1372
  config.name = config.simulationName;
1340
1373
 
1374
+ //events
1375
+ if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
1376
+ // @ts-ignore
1377
+ if (typeof events[0] === "string") events = events.map(e => ({ event: e }));
1378
+
1341
1379
  //max batch size
1342
1380
  if (batchSize > 0) BATCH_SIZE = batchSize;
1343
1381
 
@@ -1664,9 +1702,9 @@ if (NODE_ENV !== "prod") {
1664
1702
  bytes: bytesHuman(bytes || 0),
1665
1703
  };
1666
1704
  if (bytes > 0) console.table(stats);
1667
- if (Object.keys(data?.importResults).length) {
1668
- log(`\nlog written to log.json\n`);
1669
- writeFileSync(path.resolve(folder, "log.json"), JSON.stringify(data?.importResults, null, 2));
1705
+ if (Object.keys(data?.importResults || {}).length) {
1706
+ log(`\nlog written to log.json\n`);
1707
+ writeFileSync(path.resolve(folder, "log.json"), JSON.stringify(data?.importResults, null, 2));
1670
1708
  }
1671
1709
  // log(" " + files?.flat().join("\n "));
1672
1710
  log(`\n----------------SUMMARY-----------------\n\n\n`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "1.5.051",
3
+ "version": "1.5.053",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "main": "index.js",
6
6
  "types": "types.d.ts",
package/scratch.mjs CHANGED
@@ -13,17 +13,17 @@ TO DOs
13
13
 
14
14
  import main from "./index.js";
15
15
 
16
- import simple from './schemas/simple.js';
17
- import funnels from './schemas/funnels.js';
18
- import foobar from './schemas/foobar.js';
19
- import complex from './schemas/complex.js';
20
- import adspend from './schemas/adspend.js'
16
+ import simple from './dungeons/simple.js';
17
+ import funnels from './dungeons/funnels.js';
18
+ import foobar from './dungeons/foobar.js';
19
+ import complex from './dungeons/complex.js';
20
+ import adspend from './dungeons/adspend.js'
21
21
 
22
- import anon from './schemas/anon.js';
22
+ import anon from './dungeons/anon.js';
23
23
  import execSync from 'child_process';
24
- import mirror from './schemas/mirror.js'
24
+ import mirror from './dungeons/mirror.js'
25
25
  // import mds from './dungeons/modern-data-stack.js'
26
- import big from './schemas/big.js'
26
+ import big from './dungeons/big.js'
27
27
 
28
28
  const numEvents = 1000;
29
29
 
@@ -33,6 +33,8 @@ const spec = {
33
33
  writeToDisk: true,
34
34
  verbose: true,
35
35
  makeChart: false,
36
+ hasAnonIds: true,
37
+ hasSessionIds: true
36
38
  // format: "csv",
37
39
  // numEvents,
38
40
  // numUsers: numEvents / 100,
package/types.d.ts CHANGED
@@ -73,7 +73,7 @@ declare namespace main {
73
73
  }
74
74
 
75
75
  export type complexSCDProp = {
76
- type: string;
76
+ type?: string | "user_id" | "company_id";
77
77
  frequency: "day" | "week" | "month" | "year";
78
78
  values: ValueValid;
79
79
  timing: "fixed" | "fuzzy";