make-mp-data 1.5.52 → 1.5.54

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,288 @@
1
+ const run = require("../index.js");
2
+ const SEED = "my-seed";
3
+ const dayjs = require("dayjs");
4
+ const utc = require("dayjs/plugin/utc");
5
+ dayjs.extend(utc);
6
+ require("dotenv").config();
7
+ const u = require("../components/utils");
8
+ const v = require("ak-tools");
9
+ const { projectId, serviceAccount, serviceSecret } = require("./adspend");
10
+ const chance = u.initChance(SEED);
11
+ const num_users = 1_500;
12
+ const days = 91;
13
+
14
+ const GOVERNANCE_TOKEN = process.env.GOVERNANCE_TOKEN;
15
+ const GOVERNANCE_ID = process.env.GOVERNANCE_ID;
16
+ const GOVERNANCE_ACCT = process.env.GOVERNANCE_ACCT;
17
+ const GOVERNANCE_SECRET = process.env.GOVERNANCE_SECRET;
18
+
19
+ const URLS = [
20
+ "/",
21
+ "/refinance-student-loan/",
22
+ "/private-student-loans/",
23
+ "/personal-loans/",
24
+ "/home-loans/",
25
+ "/invest/",
26
+ "/banking/",
27
+ "/relay/",
28
+ "/life-insurance/",
29
+ "/learn/",
30
+ "/faq/",
31
+ "/career-advisory/",
32
+ "/member-benefits/",
33
+ "/experiences/",
34
+ "/member-stories/",
35
+ "/contact-us/",
36
+ "/jobs/",
37
+ "/on-the-money/",
38
+ "/about-sofi/",
39
+ "/management-team/",
40
+ "/eligibility-criteria/",
41
+ "/referral-program/",
42
+ "/sofi-at-work/"
43
+ ];
44
+
45
+ /** @type {Config} */
46
+ const commonOpts = {
47
+ verbose: true,
48
+ token: GOVERNANCE_TOKEN,
49
+ projectId: GOVERNANCE_ID,
50
+ serviceAccount: GOVERNANCE_ACCT,
51
+ serviceSecret: GOVERNANCE_SECRET,
52
+ numDays: days,
53
+ numEvents: num_users * 100,
54
+ numUsers: num_users,
55
+ hasAnonIds: false,
56
+ hasSessionIds: true,
57
+ format: "json",
58
+ alsoInferFunnels: true,
59
+ hasLocation: true,
60
+ hasAndroidDevices: false,
61
+ hasIOSDevices: false,
62
+ hasDesktopDevices: false,
63
+ hasBrowser: true,
64
+ hasCampaigns: false,
65
+ isAnonymous: false,
66
+ hasAdSpend: false,
67
+
68
+ hasAvatar: true,
69
+ makeChart: false,
70
+
71
+ batchSize: 1_500_000,
72
+ concurrency: 1,
73
+ writeToDisk: false,
74
+ percentUsersBornInDataset: 15
75
+
76
+ };
77
+
78
+ /** @typedef {import("../types").Dungeon} Config */
79
+
80
+ /** @type {Config} */
81
+ const diffNameSameMeaning = {
82
+ ...commonOpts,
83
+ name: "same-meaning",
84
+ hasAdSpend: true,
85
+ seed: "foo",
86
+ description: "A dungeon with really unclear event names",
87
+ funnels: [],
88
+ events: [
89
+ //@ts-ignore
90
+ " login", "login ", " sign in", "login", "Log In", "Sign In", "sign in", "signin", "page view", "page viewed", "viewed page", "view page", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "pageviewed", "page viewed", "page view", "pageview", "page viewed", "page view", "page_viewed", "page"
91
+ ],
92
+ superProps: {
93
+ "theme": "diff name; same meaning",
94
+ "url": u.pickAWinner(URLS, 0)
95
+ },
96
+ userProps: {},
97
+ hook: (a, type) => {
98
+ if (type === "everything") {
99
+ return a;
100
+ }
101
+
102
+ return a;
103
+ }
104
+ };
105
+
106
+
107
+ /** @type {Config} */
108
+ const sameNameDiffMeaning = {
109
+ ...commonOpts,
110
+ name: "diff-meanings",
111
+ seed: "bar",
112
+ description: "a dataset with a single event that means different things",
113
+ funnels: [],
114
+ events: [
115
+ {
116
+ event: "button click",
117
+ properties: {
118
+ "button name": ["start now", "register", "submit", "learn more", "try now", "free trial"],
119
+ "form name": u.pickAWinner(["mortgage", "student loan", "personal loan ", "credit card ", "savings account", "subscribe form"]),
120
+ "cta": ["free for 30d", "decision in 5min", "chat with a human", "try it in the app"],
121
+ }
122
+ }
123
+ ],
124
+ superProps: {
125
+ "theme": "same name; diff meaning",
126
+ "url": u.pickAWinner(URLS, 0)
127
+ },
128
+ userProps: {},
129
+ scdProps: {},
130
+ hook: (a, type) => {
131
+ if (type === "everything") {
132
+ return a;
133
+ }
134
+
135
+ return a;
136
+ }
137
+ };
138
+
139
+ /** @type {Config} */
140
+ const Borrowing = {
141
+ ...commonOpts,
142
+ percentUsersBornInDataset: 100,
143
+ alsoInferFunnels: false,
144
+ name: "borrowing",
145
+ seed: "baz",
146
+ description: "a dataset which requires borrowing",
147
+ funnels: [
148
+ {
149
+ "sequence": ["page viewed", "click!", "sign up"],
150
+ "isFirstFunnel": true,
151
+ "order": "sequential"
152
+ },
153
+ {
154
+ "sequence": ["start application", "submit application"],
155
+ "props": {
156
+ "product": u.pickAWinner(["mortgage", "student loan", "personal loan ", "credit card ", "savings account", "investing account"]),
157
+ "application Id": () => { return v.uid(10); },
158
+ }
159
+ }
160
+ ],
161
+ events: [
162
+ {
163
+ event: "page viewed",
164
+ properties: {
165
+ "marketing channel": u.pickAWinner(["organic", "meta", "google", "x", "snapchat", "youtube", "instagram"], 0),
166
+ }
167
+ },
168
+ {
169
+ event: "click!",
170
+ properties: {
171
+ "CTA clicked": u.pickAWinner(["start now", "register", "submit", "learn more", "try now", "free trial"]),
172
+ }
173
+ },
174
+ {
175
+ event: "sign up",
176
+ properties: {
177
+ "A/B test": ["fast onboarding", "custom colors", "localization", "new design"],
178
+ "Variant": ["A", "B", "C", "Control"]
179
+ },
180
+ isFirstEvent: true
181
+ },
182
+ {
183
+ event: "start application",
184
+
185
+ },
186
+ {
187
+ event: "submit application",
188
+ },
189
+ ],
190
+ superProps: {
191
+ "theme": "requires borrows",
192
+ "url": u.pickAWinner(URLS, 0)
193
+ },
194
+ userProps: {},
195
+ scdProps: {},
196
+ hook: (a, type) => {
197
+ if (type === "everything") {
198
+ return a;
199
+ }
200
+
201
+ return a;
202
+ }
203
+ };
204
+
205
+
206
+ /** @type {Config} */
207
+ const Modeling = {
208
+ ...commonOpts,
209
+ percentUsersBornInDataset: 100,
210
+ alsoInferFunnels: false,
211
+ name: "bad shapes",
212
+ seed: "qux",
213
+ description: "a data set with bad modeling",
214
+ funnels: [
215
+
216
+ ],
217
+ events: [
218
+ {
219
+ event: "nested nightmare",
220
+ properties: {
221
+ data: buildAStupidNestedObject
222
+ }
223
+ },
224
+ ],
225
+ superProps: {
226
+ "theme": "deeply nested",
227
+ "url": u.pickAWinner(URLS, 0)
228
+ },
229
+ userProps: {},
230
+ scdProps: {},
231
+ hook: (a, type) => {
232
+ if (type === "everything") {
233
+ return a;
234
+ }
235
+
236
+ return a;
237
+ }
238
+ };
239
+
240
+ const metaValues = u.pickAWinner(["foo", "bar", "baz", "qux", "quux", "corge", "grault", "garply", "waldo", "fred", "plugh", "xyzzy", "thud"]);
241
+ function buildAStupidNestedObject(depth = 0) {
242
+ let obj = {};
243
+ const keyPrefix = ["A", "B", "C", "D", "E", "F", "G", "H", "I"];
244
+ const keySuffix = ["1", "2", "3", "4", "5", "6", "7", "8", "9"];
245
+
246
+ let numKeys = chance.integer({ min: 1, max: 5 });
247
+ for (let i = 0; i < numKeys; i++) {
248
+ if (depth <= 3 && chance.bool({ likelihood: 10 })) {
249
+ let key = chance.pickone(keyPrefix) + ">" + chance.pickone(keySuffix);
250
+ obj[key] = buildAStupidNestedObject();
251
+ }
252
+ else {
253
+ let key = chance.pickone(keyPrefix) + "|" + chance.pickone(keySuffix);
254
+ let value = u.choose(metaValues);
255
+ obj[key] = value;
256
+ }
257
+ }
258
+ return obj;
259
+ }
260
+
261
+
262
+ const dungeons = [
263
+ diffNameSameMeaning,
264
+ sameNameDiffMeaning,
265
+ Borrowing,
266
+ Modeling
267
+ ];
268
+
269
+ async function runDungeons() {
270
+ const results = [];
271
+ for (let dungeon of dungeons) {
272
+ const job = await run(dungeon);
273
+ results.push(job);
274
+ }
275
+ return results;
276
+ }
277
+
278
+ runDungeons()
279
+ .then(results => {
280
+ debugger;
281
+ })
282
+ .catch(e => {
283
+ console.error(e);
284
+ debugger;
285
+
286
+ });
287
+
288
+ module.exports = {};
package/index.js CHANGED
@@ -23,7 +23,7 @@ global.FIXED_NOW = FIXED_NOW;
23
23
  // ^ this creates a FIXED POINT in time; we will shift it later
24
24
  let FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(90, 'd').unix();
25
25
  global.FIXED_BEGIN = FIXED_BEGIN;
26
- const actualNow = dayjs();
26
+ const actualNow = dayjs().add(2, "day");
27
27
  const timeShift = actualNow.diff(dayjs.unix(FIXED_NOW), "seconds");
28
28
  const daysShift = actualNow.diff(dayjs.unix(FIXED_NOW), "days");
29
29
 
@@ -104,6 +104,16 @@ async function main(config) {
104
104
  campaigns: u.pickAWinner(campaigns, 0),
105
105
  };
106
106
 
107
+ if (config.singleCountry) {
108
+ DEFAULTS.locationsEvents = u.pickAWinner(clone(locations)
109
+ .filter(l => l.country === config.singleCountry)
110
+ .map(l => { delete l.country; return l; }), 0);
111
+
112
+ DEFAULTS.locationsUsers = u.pickAWinner(clone(locations)
113
+ .filter(l => l.country === config.singleCountry)
114
+ .map(l => { delete l.country_code; return l; }), 0);
115
+ }
116
+
107
117
 
108
118
  //TRACKING
109
119
  const runId = uid(42);
@@ -124,8 +134,8 @@ async function main(config) {
124
134
  const scdTableKeys = Object.keys(scdProps);
125
135
  const scdTableData = await Promise.all(scdTableKeys.map(async (key) =>
126
136
  //todo don't assume everything is a string... lol
127
- // @ts-ignore
128
- await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, entityType: config.scdProps[key].type, dataType: "string", filepath: `${simulationName}-${scdProps[key]?.type || "user"}-SCD-${key}` })
137
+ // @ts-ignore
138
+ await makeHookArray([], { hook, type: "scd", config, format, scdKey: key, entityType: config.scdProps[key]?.type || "user", dataType: "string", filepath: `${simulationName}-${scdProps[key]?.type || "user"}-SCD-${key}` })
129
139
  ));
130
140
  const groupTableKeys = Object.keys(groupKeys);
131
141
  const groupProfilesData = await Promise.all(groupTableKeys.map(async (key, index) => {
@@ -431,16 +441,20 @@ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, s
431
441
  let defaultProps = {};
432
442
  let devicePool = [];
433
443
 
434
- if (hasLocation) defaultProps.location = DEFAULTS.locationsEvents();
435
- if (hasBrowser) defaultProps.browser = DEFAULTS.browsers();
444
+ if (hasLocation) defaultProps.location = u.shuffleArray(DEFAULTS.locationsEvents()).pop();
445
+ if (hasBrowser) defaultProps.browser = u.choose(DEFAULTS.browsers());
436
446
  if (hasAndroidDevices) devicePool.push(DEFAULTS.androidDevices());
437
447
  if (hasIOSDevices) devicePool.push(DEFAULTS.iOSDevices());
438
448
  if (hasDesktopDevices) devicePool.push(DEFAULTS.desktopDevices());
439
449
 
440
450
  // we don't always have campaigns, because of attribution
441
- if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = DEFAULTS.campaigns();
451
+ if (hasCampaigns && chance.bool({ likelihood: 25 })) defaultProps.campaigns = u.shuffleArray(DEFAULTS.campaigns()).pop();
442
452
  const devices = devicePool.flat();
443
- if (devices.length) defaultProps.device = devices;
453
+ if (devices.length) defaultProps.device = u.shuffleArray(devices).pop();
454
+
455
+
456
+
457
+
444
458
 
445
459
 
446
460
  //event time
@@ -505,6 +519,33 @@ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, s
505
519
  }
506
520
  }
507
521
  }
522
+ else if (typeof (defaultProps[key]) === "object") {
523
+ const obj = defaultProps[key];
524
+ for (const subKey in obj) {
525
+ if (Array.isArray(obj[subKey])) {
526
+ const subChoice = u.choose(obj[subKey]);
527
+ if (Array.isArray(subChoice)) {
528
+ for (const subSubChoice of subChoice) {
529
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subSubChoice;
530
+ }
531
+ }
532
+ else if (typeof subChoice === "object") {
533
+ for (const subSubKey in subChoice) {
534
+ if (!eventTemplate[subSubKey]) eventTemplate[subSubKey] = subChoice[subSubKey];
535
+ }
536
+ }
537
+ else {
538
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
539
+ }
540
+ }
541
+ else {
542
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = obj[subKey];
543
+ }
544
+ }
545
+ }
546
+ else {
547
+ if (!eventTemplate[key]) eventTemplate[key] = defaultProps[key];
548
+ }
508
549
  }
509
550
  }
510
551
 
@@ -725,12 +766,14 @@ async function makeFunnel(funnel, user, firstEventTime, profile, scd, config) {
725
766
  */
726
767
  async function makeProfile(props, defaults) {
727
768
  operations++;
769
+ const keysToNotChoose = ["anonymousIds", "sessionIds"];
728
770
 
729
771
  const profile = {
730
772
  ...defaults,
731
773
  };
732
774
 
733
775
  for (const key in profile) {
776
+ if (keysToNotChoose.includes(key)) continue;
734
777
  try {
735
778
  profile[key] = u.choose(profile[key]);
736
779
  }
@@ -763,7 +806,7 @@ async function makeProfile(props, defaults) {
763
806
  */
764
807
  async function makeSCD(scdProp, scdKey, distinct_id, mutations, created) {
765
808
  if (Array.isArray(scdProp)) scdProp = { values: scdProp, frequency: 'week', max: 10, timing: 'fuzzy', type: 'user' };
766
- const { frequency, max, timing, values, type } = scdProp;
809
+ const { frequency, max, timing, values, type = "user" } = scdProp;
767
810
  if (JSON.stringify(values) === "{}" || JSON.stringify(values) === "[]") return [];
768
811
  const scdEntries = [];
769
812
  let lastInserted = dayjs(created);
@@ -999,7 +1042,7 @@ async function userLoop(config, storage, concurrency = 1) {
999
1042
 
1000
1043
 
1001
1044
  // SCD creation
1002
- const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user');
1045
+ const scdUserTables = t.objFilter(scdProps, (scd) => scd.type === 'user' || !scd.type);
1003
1046
  const scdTableKeys = Object.keys(scdUserTables);
1004
1047
 
1005
1048
 
@@ -1123,7 +1166,8 @@ async function sendToMixpanel(config, storage) {
1123
1166
  fixData: true,
1124
1167
  verbose: false,
1125
1168
  forceStream: true,
1126
- strict: false,
1169
+ strict: true, //false,
1170
+ epochEnd: dayjs().unix(), //is this chill?
1127
1171
  dryRun: false,
1128
1172
  abridged: false,
1129
1173
  fixJson: true,
@@ -1338,6 +1382,11 @@ function validateDungeonConfig(config) {
1338
1382
  config.simulationName = name || makeName();
1339
1383
  config.name = config.simulationName;
1340
1384
 
1385
+ //events
1386
+ if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
1387
+ // @ts-ignore
1388
+ if (typeof events[0] === "string") events = events.map(e => ({ event: e }));
1389
+
1341
1390
  //max batch size
1342
1391
  if (batchSize > 0) BATCH_SIZE = batchSize;
1343
1392
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "1.5.052",
3
+ "version": "1.5.054",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "main": "index.js",
6
6
  "types": "types.d.ts",
package/scratch.mjs CHANGED
@@ -13,17 +13,17 @@ TO DOs
13
13
 
14
14
  import main from "./index.js";
15
15
 
16
- import simple from './schemas/simple.js';
17
- import funnels from './schemas/funnels.js';
18
- import foobar from './schemas/foobar.js';
19
- import complex from './schemas/complex.js';
20
- import adspend from './schemas/adspend.js'
16
+ import simple from './dungeons/simple.js';
17
+ import funnels from './dungeons/funnels.js';
18
+ import foobar from './dungeons/foobar.js';
19
+ import complex from './dungeons/complex.js';
20
+ import adspend from './dungeons/adspend.js'
21
21
 
22
- import anon from './schemas/anon.js';
22
+ import anon from './dungeons/anon.js';
23
23
  import execSync from 'child_process';
24
- import mirror from './schemas/mirror.js'
24
+ import mirror from './dungeons/mirror.js'
25
25
  // import mds from './dungeons/modern-data-stack.js'
26
- import big from './schemas/big.js'
26
+ import big from './dungeons/big.js'
27
27
 
28
28
  const numEvents = 1000;
29
29
 
@@ -33,6 +33,8 @@ const spec = {
33
33
  writeToDisk: true,
34
34
  verbose: true,
35
35
  makeChart: false,
36
+ hasAnonIds: true,
37
+ hasSessionIds: true
36
38
  // format: "csv",
37
39
  // numEvents,
38
40
  // numUsers: numEvents / 100,
package/types.d.ts CHANGED
@@ -50,6 +50,7 @@ declare namespace main {
50
50
  hasSessionIds?: boolean;
51
51
  alsoInferFunnels?: boolean;
52
52
  makeChart?: boolean | string;
53
+ singleCountry?: string;
53
54
 
54
55
  //models
55
56
  events?: EventConfig[]; //| string[]; //can also be a array of strings
@@ -73,7 +74,7 @@ declare namespace main {
73
74
  }
74
75
 
75
76
  export type complexSCDProp = {
76
- type: string;
77
+ type?: string | "user_id" | "company_id";
77
78
  frequency: "day" | "week" | "month" | "year";
78
79
  values: ValueValid;
79
80
  timing: "fixed" | "fuzzy";