make-mp-data 1.5.54 → 1.5.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@
15
15
  "request": "launch",
16
16
  "name": "run dungeon",
17
17
  "runtimeExecutable": "nodemon",
18
- "runtimeArgs": ["--inspect"],
18
+ "runtimeArgs": ["--inspect", "--ignore", "./data"],
19
19
  "program": "${workspaceFolder}/index.js",
20
20
  "args": ["--ignore", "./data/*", "${file}"],
21
21
  "restart": true,
@@ -23,6 +23,7 @@
23
23
  "internalConsoleOptions": "neverOpen",
24
24
  "skipFiles": ["<node_internals>/**"],
25
25
  "preLaunchTask": "npm: prune",
26
+
26
27
  },
27
28
  {
28
29
  "type": "node",
@@ -35,7 +36,8 @@
35
36
  "internalConsoleOptions": "neverOpen",
36
37
  "env": {
37
38
  "NODE_ENV": "dev"
38
- }
39
+ },
40
+ "runtimeArgs": ["--ignore", "./data"],
39
41
  },
40
42
  {
41
43
  "command": "npm run func:local",
package/components/ai.js CHANGED
@@ -83,21 +83,35 @@ function validator(schema) {
83
83
 
84
84
 
85
85
  if (require.main === module) {
86
- generateSchema(` a payments platform called popPay PopPay platform, enables facial recognition-based transactions for contact-free payments, entry, and verification. Users sign up with a selfie, and their image becomes a secure digital key stored in PopID’s cloud, allowing for streamlined interactions at participating businesses. This system aims to replace IDs, passwords, and loyalty cards, focusing on security and user control.
86
+ generateSchema(`https://apps.apple.com/us/app/call-guardian-for-us-cellular/id1228680023 call guardian is an app for blocking spam calls made by TNS
87
87
 
88
- Customers choose
89
- to be recognized
90
- Guests opt-in by standing directly in front of camera or notifying cashier. PopID never identifies guests without their permission.
91
-
92
- Personalized ordering experience
93
- Personalized
94
- ordering experience
95
- Guests can be automatically signed in to loyalty programs, shown past orders, and offered customized recommendations.
96
-
97
- Contact-free payment
98
- Contact-free
99
- payment
100
- PopPay’s eWallet links to guest’s payment card or checking account and offers lower processing fees than credit and debit cards.`)
88
+ this is the list of events we want:
89
+
90
+ Onboarding Started
91
+ Onboarding Completed (Basic)
92
+ Onboarding Completed (Premium)
93
+ Page Views (all "screens" within the app")
94
+ Enable/Disable High Risk Blocking
95
+ Enable/Disable Medium Risk Blocking
96
+ Enable/Disable Neighborhood Spoof Blocking
97
+ Call Blocked (Spam)
98
+ Call Blocked (Custom List)
99
+ Branded Call w/o Logo Received
100
+ Branded Call w/ Logo Received
101
+ Branded Call Answered
102
+ Branded Call Blocked
103
+ Enable/Disable Text Spam
104
+ Reverse Number Lookup
105
+ Report as Spam
106
+ Report as Not Spam
107
+ Custom Block List Number Add
108
+ Custom Block List Number Remove
109
+ Call Arrives Before Push
110
+ Error Scenarios
111
+ User Can't Authenticate
112
+ Xfinity Services Can't Connect
113
+ Verizon Services Can't Connect
114
+ Deep Links into app`)
101
115
  .then((result) => {
102
116
  if (NODE_ENV === "dev") debugger;
103
117
  })
@@ -6,7 +6,7 @@ const { spawn } = require('child_process');
6
6
  const dayjs = require('dayjs');
7
7
  const utc = require('dayjs/plugin/utc');
8
8
  const path = require('path');
9
- const { mkdir } = require('ak-tools');
9
+ const { mkdir, parseGCSUri } = require('ak-tools');
10
10
  const { existsSync } = require('fs');
11
11
  dayjs.extend(utc);
12
12
  require('dotenv').config();
@@ -26,6 +26,11 @@ let chanceInitialized = false;
26
26
  const ACTUAL_NOW = dayjs.utc();
27
27
 
28
28
 
29
+ const { Storage: cloudStorage } = require('@google-cloud/storage');
30
+ const projectId = 'YOUR_PROJECT_ID';
31
+ const storage = new cloudStorage({ projectId });
32
+
33
+
29
34
  /*
30
35
  ----
31
36
  RNG
@@ -184,10 +189,10 @@ function choose(value) {
184
189
  try {
185
190
  // Keep resolving the value if it's a function
186
191
  while (typeof value === 'function') {
187
- value = value();
192
+ value = value();
188
193
  }
189
194
 
190
-
195
+
191
196
  // [[],[],[]] should pick one
192
197
  if (Array.isArray(value) && Array.isArray(value[0])) {
193
198
  return chance.pickone(value);
@@ -434,23 +439,38 @@ STREAMERS
434
439
  ----
435
440
  */
436
441
 
437
- function streamJSON(path, data) {
442
+ function streamJSON(filePath, data) {
438
443
  return new Promise((resolve, reject) => {
439
- const writeStream = fs.createWriteStream(path, { encoding: 'utf8' });
444
+ let writeStream;
445
+ if (filePath?.startsWith('gs://')) {
446
+ const { uri, bucket, file } = parseGCSUri(filePath);
447
+ writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
448
+ }
449
+ else {
450
+ writeStream = fs.createWriteStream(filePath, { encoding: 'utf8' });
451
+ }
440
452
  data.forEach(item => {
441
453
  writeStream.write(JSON.stringify(item) + '\n');
442
454
  });
443
455
  writeStream.end();
444
456
  writeStream.on('finish', () => {
445
- resolve(path);
457
+ resolve(filePath);
446
458
  });
447
459
  writeStream.on('error', reject);
448
460
  });
449
461
  }
450
462
 
451
- function streamCSV(path, data) {
463
+ function streamCSV(filePath, data) {
452
464
  return new Promise((resolve, reject) => {
453
- const writeStream = fs.createWriteStream(path, { encoding: 'utf8' });
465
+ let writeStream;
466
+ if (filePath?.startsWith('gs://')) {
467
+ const { uri, bucket, file } = parseGCSUri(filePath);
468
+ writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
469
+ }
470
+ else {
471
+ writeStream = fs.createWriteStream(filePath, { encoding: 'utf8' });
472
+ }
473
+
454
474
  // Extract all unique keys from the data array
455
475
  const columns = getUniqueKeys(data); // Assuming getUniqueKeys properly retrieves all keys
456
476
 
@@ -469,7 +489,7 @@ function streamCSV(path, data) {
469
489
 
470
490
  writeStream.end();
471
491
  writeStream.on('finish', () => {
472
- resolve(path);
492
+ resolve(filePath);
473
493
  });
474
494
  writeStream.on('error', reject);
475
495
  });
package/dungeons/big.js CHANGED
@@ -1,15 +1,4 @@
1
- /**
2
- * This is the default configuration file for the data generator in SIMPLE mode
3
- * notice how the config object is structured, and see it's type definition in ./types.d.ts
4
- * feel free to modify this file to customize the data you generate
5
- * see helper functions in utils.js for more ways to generate data
6
- */
7
-
8
-
9
- /* cSpell:disable */
10
-
11
-
12
-
1
+ const seed = "lets go big";
13
2
  const Chance = require('chance');
14
3
  const chance = new Chance();
15
4
  const dayjs = require("dayjs");
@@ -18,17 +7,19 @@ dayjs.extend(utc);
18
7
  const { uid, comma, makeName } = require('ak-tools');
19
8
  const { pickAWinner, weighNumRange, integer, date, choose } = require('../components/utils');
20
9
 
10
+
21
11
  const eventsPerQuarter = 5_000_000_000 // ~5 billion
22
- const numQuarters = 8;
23
- const totalEvents = eventsPerQuarter * numQuarters;
12
+ const numQuarters = 8; // 24 months
13
+ const parallelism = 5000;
14
+ const totalEvents = Math.floor((eventsPerQuarter * numQuarters) / parallelism);
24
15
  const eventPerUser = 500;
25
- const totalUsers = totalEvents / eventPerUser;
16
+ const totalUsers = Math.floor(totalEvents / eventPerUser);
26
17
  const totalDays = (numQuarters * 90) + 10;
27
18
 
28
19
  /** @type {import('../types').Dungeon} */
29
20
  const config = {
30
- token: "64b48bc361f4477634bdfafb78ef39ad",
31
- seed: "lets go big",
21
+ token: "",
22
+ seed: seed,
32
23
  numDays: totalDays,
33
24
  numEvents: totalEvents,
34
25
  numUsers: totalUsers,
@@ -39,14 +30,14 @@ const config = {
39
30
  hasLocation: true,
40
31
  hasAndroidDevices: false,
41
32
  alsoInferFunnels: false,
42
- batchSize: 5_000_000,
43
- hasAvatar: true,
33
+ batchSize: 2_000_000,
34
+ hasAvatar: false,
44
35
  hasAdSpend: false,
45
36
  hasBrowser: false,
46
37
  hasCampaigns: false,
47
38
  hasDesktopDevices: false,
48
39
  hasIOSDevices: false,
49
- writeToDisk: false,
40
+ writeToDisk: "gs://dungeon_master_4/big_data",
50
41
  funnels: [
51
42
  {
52
43
  "sequence": ["foo", "bar", "baz", "qux", "garply", "durtle", "linny", "fonk", "crumn", "yak"],
@@ -118,8 +118,7 @@ const config = {
118
118
  }
119
119
  }
120
120
  ],
121
- superProps: {
122
- platform: ["web", "mobile", "web", "mobile", "web", "web", "kiosk", "smartTV"],
121
+ superProps: {
123
122
  currentTheme: weighChoices(["light", "dark", "custom", "light", "dark"]),
124
123
  },
125
124
  /*
package/index.js CHANGED
@@ -45,6 +45,22 @@ const t = require('ak-tools');
45
45
 
46
46
  //CLOUD
47
47
  const functions = require('@google-cloud/functions-framework');
48
+ const { GoogleAuth } = require('google-auth-library');
49
+ const CONCURRENCY = 1_000;
50
+ let RUNTIME_URL = "https://dm4-lmozz6xkha-uc.a.run.app"; // IMPORTANT: this is what allows the service to call itself
51
+ // const functionName = process.env.FUNCTION_NAME || process.env.K_SERVICE;
52
+
53
+ // const region = process.env.REGION; // Optionally, you can get the region too
54
+ // const GCP_PROJECT = process.env.GCLOUD_PROJECT; // Project ID is also available as an environment variable
55
+ // const isCloudFunction = !!process.env.FUNCTION_NAME || !!process.env.FUNCTION_TARGET;
56
+ // if (isCloudFunction) {
57
+ // RUNTIME_URL = `https://${region}-${GCP_PROJECT}.cloudfunctions.net/${functionName}`;
58
+ // }
59
+ // else {
60
+ // RUNTIME_URL = `http://localhost:8080`;
61
+ // }
62
+
63
+
48
64
 
49
65
  // DEFAULTS
50
66
  const { campaigns, devices, locations } = require('./components/defaults.js');
@@ -74,7 +90,6 @@ let eventCount = 0;
74
90
  let userCount = 0;
75
91
 
76
92
 
77
-
78
93
  /**
79
94
  * generates fake mixpanel data
80
95
  * @param {Config} config
@@ -336,60 +351,156 @@ async function main(config) {
336
351
  ...STORAGE,
337
352
  importResults,
338
353
  files,
354
+ operations,
355
+ eventCount,
356
+ userCount,
339
357
  time: { start, end, delta, human },
340
358
  };
341
359
  }
342
360
 
343
-
344
-
345
361
  functions.http('entry', async (req, res) => {
346
362
  const reqTimer = timer('request');
347
363
  reqTimer.start();
348
364
  let response = {};
349
365
  let script = req.body || "";
350
- let writePath;
366
+ const params = { replicate: 1, is_replica: "false", runId: "", seed: "", ...req.query };
367
+ const replicate = Number(params.replicate);
368
+ // @ts-ignore
369
+ if (params?.is_replica === "true") params.is_replica = true;
370
+ // @ts-ignore
371
+ else params.is_replica = false;
372
+ const isReplica = params.is_replica;
373
+ isBATCH_MODE = true;
374
+ if (!params.runId) params.runId = uid(42);
351
375
  try {
352
- sLog("DM4: start");
353
376
  if (!script) throw new Error("no script");
354
377
 
355
378
  // Replace require("../ with require("./
356
- script = script.replace(/require\("\.\.\//g, 'require("./');
379
+ // script = script.replace(/require\("\.\.\//g, 'require("./');
357
380
  // ^ need to replace this because of the way the script is passed in... this is sketch
358
381
 
359
382
  /** @type {Config} */
360
383
  const config = eval(script);
361
- sLog("DM4: eval ok");
362
-
363
- const { token } = config;
364
- if (!token) throw new Error("no token");
384
+ if (isReplica) {
385
+ const newSeed = (Math.random() / Math.random() / Math.random() / Math.random() / Math.random() / Math.random()).toString();
386
+ config.seed = newSeed;
387
+ params.seed = newSeed;
388
+ }
365
389
 
366
390
  /** @type {Config} */
367
391
  const optionsYouCantChange = {
368
- verbose: false,
369
- writeToDisk: false,
370
-
392
+ verbose: false
371
393
  };
372
- const result = await main({
373
- ...config,
374
- ...optionsYouCantChange,
375
- });
376
- await rm(writePath);
377
- reqTimer.stop(false);
378
- const { start, end, delta, human } = jobTimer.report(false);
379
- sLog(`DM4: end (${human})`, { ms: delta });
394
+
395
+ if (replicate <= 1 || isReplica) {
396
+ if (isReplica) sLog("DM4: worker start", params);
397
+ // @ts-ignore
398
+ const { files = [], operations = 0, eventCount = 0, userCount = 0 } = await main({
399
+ ...config,
400
+ ...optionsYouCantChange,
401
+ });
402
+ reqTimer.stop(false);
403
+ response = { files, operations, eventCount, userCount };
404
+ }
405
+
406
+ else {
407
+ sLog(`DM4: job start (${replicate} workers)`, params);
408
+ const results = await spawn_file_workers(replicate, script, params);
409
+ response = results;
410
+ }
380
411
  }
381
412
  catch (e) {
382
- sLog("DM4: error", { error: e.message });
413
+ sLog("DM4: error", { error: e.message, stack: e.stack }, "ERROR");
383
414
  response = { error: e.message };
384
415
  res.status(500);
385
- await rm(writePath);
386
416
  }
417
+
387
418
  finally {
419
+ reqTimer.stop(false);
420
+ const { start, end, delta, human } = reqTimer.report(false);
421
+ if (!isReplica) {
422
+ sLog(`DM4: job end (${human})`, { human, delta, ...params, ...response });
423
+ }
424
+ if (isReplica) {
425
+ const eps = Math.floor(((response?.eventCount || 0) / delta) * 1000);
426
+ sLog(`DM4: worker end (${human})`, { human, delta, eps, ...params, ...response });
427
+ }
428
+ response = { ...response, start, end, delta, human, ...params };
388
429
  res.send(response);
430
+ return;
389
431
  }
390
432
  });
391
433
 
392
434
 
435
+ /**
436
+ * @typedef {import('mixpanel-import').ImportResults} ImportResults
437
+ */
438
+ async function spawn_file_workers(numberWorkers, payload, params) {
439
+ const auth = new GoogleAuth();
440
+ let client;
441
+ if (RUNTIME_URL.includes('localhost')) {
442
+ client = await auth.getClient();
443
+ }
444
+ else {
445
+ client = await auth.getIdTokenClient(RUNTIME_URL);
446
+ }
447
+ const limit = pLimit(CONCURRENCY);
448
+ const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
449
+
450
+ const requestPromises = Array.from({ length: numberWorkers }, async (_, index) => {
451
+ index = index + 1;
452
+ await delay(index * 108);
453
+ return limit(() => build_request(client, payload, index, params, numberWorkers));
454
+ });
455
+ const complete = await Promise.allSettled(requestPromises);
456
+ const results = {
457
+ jobs_success: complete.filter((p) => p.status === "fulfilled").length,
458
+ jobs_fail: complete.filter((p) => p.status === "rejected").length
459
+ };
460
+
461
+ return results;
462
+ }
463
+
464
+
465
+ async function build_request(client, payload, index, params, total) {
466
+ let retryAttempt = 0;
467
+ sLog(`DM4: summoning worker #${index} of ${total}`, params);
468
+ try {
469
+ const req = await client.request({
470
+ url: RUNTIME_URL + `?replicate=1&is_replica=true&runId=${params.runId || "no run id"}`,
471
+ method: "POST",
472
+ data: payload,
473
+ headers: {
474
+ "Content-Type": "text/plain",
475
+ },
476
+ timeout: 3600 * 1000 * 10,
477
+ retryConfig: {
478
+ retry: 3,
479
+ onRetryAttempt: (error) => {
480
+ const statusCode = error?.response?.status?.toString() || "";
481
+ retryAttempt++;
482
+ sLog(`DM4: summon worker ${index} retry #${retryAttempt}`, { statusCode, message: error.message, stack: error.stack, ...params }, "DEBUG");
483
+ },
484
+ retryDelay: 1000,
485
+ shouldRetry: (error) => {
486
+ if (error.code === 'ECONNRESET') return true;
487
+ const statusCode = error?.response?.status;
488
+ if (statusCode >= 500) return true;
489
+ if (statusCode === 429) return true;
490
+ }
491
+ },
492
+ });
493
+ sLog(`DM4: worker #${index} responded`, params);
494
+ const { data } = req;
495
+ return data;
496
+ } catch (error) {
497
+ sLog(`DM4: worker #${index} failed to respond`, { message: error.message, stack: error.stack, code: error.code, retries: retryAttempt, ...params }, "ERROR");
498
+ return {};
499
+ }
500
+ }
501
+
502
+
503
+
393
504
  /*
394
505
  ------
395
506
  MODELS
@@ -1393,9 +1504,9 @@ function validateDungeonConfig(config) {
1393
1504
  // funnels
1394
1505
 
1395
1506
  // FUNNEL INFERENCE
1396
- if (!funnels || !funnels.length) {
1397
- funnels = inferFunnels(events);
1398
- }
1507
+ // if (!funnels || !funnels.length) {
1508
+ // funnels = inferFunnels(events);
1509
+ // }
1399
1510
 
1400
1511
  if (alsoInferFunnels) {
1401
1512
  const inferredFunnels = inferFunnels(events);
@@ -1502,13 +1613,21 @@ async function makeHookArray(arr = [], opts = {}) {
1502
1613
  if (existsSync(dataFolder)) writeDir = dataFolder;
1503
1614
  else writeDir = path.resolve("./");
1504
1615
 
1505
- if (NODE_ENV === "prod") writeDir = path.resolve(os.tmpdir());
1616
+ // ! decide where to write the files in prod
1617
+ if (NODE_ENV === "prod") {
1618
+ writeDir = path.resolve(os.tmpdir());
1619
+ }
1620
+ if (typeof rest?.config?.writeToDisk === "string" && rest?.config?.writeToDisk?.startsWith('gs://')) {
1621
+ writeDir = rest.config.writeToDisk;
1622
+ }
1506
1623
 
1507
1624
  function getWritePath() {
1508
1625
  if (isBATCH_MODE) {
1626
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
1509
1627
  return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
1510
1628
  }
1511
1629
  else {
1630
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
1512
1631
  return path.join(writeDir, `${filepath}.${format}`);
1513
1632
  }
1514
1633
  }
@@ -1570,6 +1689,7 @@ async function makeHookArray(arr = [], opts = {}) {
1570
1689
  }
1571
1690
  if (isBATCH_MODE) data.length = 0;
1572
1691
  return writeResult;
1692
+
1573
1693
  }
1574
1694
 
1575
1695
  async function flush() {