make-mp-data 1.5.53 → 1.5.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -23,7 +23,7 @@ global.FIXED_NOW = FIXED_NOW;
23
23
  // ^ this creates a FIXED POINT in time; we will shift it later
24
24
  let FIXED_BEGIN = dayjs.unix(FIXED_NOW).subtract(90, 'd').unix();
25
25
  global.FIXED_BEGIN = FIXED_BEGIN;
26
- const actualNow = dayjs();
26
+ const actualNow = dayjs().add(2, "day");
27
27
  const timeShift = actualNow.diff(dayjs.unix(FIXED_NOW), "seconds");
28
28
  const daysShift = actualNow.diff(dayjs.unix(FIXED_NOW), "days");
29
29
 
@@ -45,6 +45,22 @@ const t = require('ak-tools');
45
45
 
46
46
  //CLOUD
47
47
  const functions = require('@google-cloud/functions-framework');
48
+ const { GoogleAuth } = require('google-auth-library');
49
+ const CONCURRENCY = 1_000;
50
+ let RUNTIME_URL = "https://dm4-lmozz6xkha-uc.a.run.app"; // IMPORTANT: this is what allows the service to call itself
51
+ // const functionName = process.env.FUNCTION_NAME || process.env.K_SERVICE;
52
+
53
+ // const region = process.env.REGION; // Optionally, you can get the region too
54
+ // const GCP_PROJECT = process.env.GCLOUD_PROJECT; // Project ID is also available as an environment variable
55
+ // const isCloudFunction = !!process.env.FUNCTION_NAME || !!process.env.FUNCTION_TARGET;
56
+ // if (isCloudFunction) {
57
+ // RUNTIME_URL = `https://${region}-${GCP_PROJECT}.cloudfunctions.net/${functionName}`;
58
+ // }
59
+ // else {
60
+ // RUNTIME_URL = `http://localhost:8080`;
61
+ // }
62
+
63
+
48
64
 
49
65
  // DEFAULTS
50
66
  const { campaigns, devices, locations } = require('./components/defaults.js');
@@ -74,7 +90,6 @@ let eventCount = 0;
74
90
  let userCount = 0;
75
91
 
76
92
 
77
-
78
93
  /**
79
94
  * generates fake mixpanel data
80
95
  * @param {Config} config
@@ -104,6 +119,16 @@ async function main(config) {
104
119
  campaigns: u.pickAWinner(campaigns, 0),
105
120
  };
106
121
 
122
+ if (config.singleCountry) {
123
+ DEFAULTS.locationsEvents = u.pickAWinner(clone(locations)
124
+ .filter(l => l.country === config.singleCountry)
125
+ .map(l => { delete l.country; return l; }), 0);
126
+
127
+ DEFAULTS.locationsUsers = u.pickAWinner(clone(locations)
128
+ .filter(l => l.country === config.singleCountry)
129
+ .map(l => { delete l.country_code; return l; }), 0);
130
+ }
131
+
107
132
 
108
133
  //TRACKING
109
134
  const runId = uid(42);
@@ -326,60 +351,156 @@ async function main(config) {
326
351
  ...STORAGE,
327
352
  importResults,
328
353
  files,
354
+ operations,
355
+ eventCount,
356
+ userCount,
329
357
  time: { start, end, delta, human },
330
358
  };
331
359
  }
332
360
 
333
-
334
-
335
361
  functions.http('entry', async (req, res) => {
336
362
  const reqTimer = timer('request');
337
363
  reqTimer.start();
338
364
  let response = {};
339
365
  let script = req.body || "";
340
- let writePath;
366
+ const params = { replicate: 1, is_replica: "false", runId: "", seed: "", ...req.query };
367
+ const replicate = Number(params.replicate);
368
+ // @ts-ignore
369
+ if (params?.is_replica === "true") params.is_replica = true;
370
+ // @ts-ignore
371
+ else params.is_replica = false;
372
+ const isReplica = params.is_replica;
373
+ isBATCH_MODE = true;
374
+ if (!params.runId) params.runId = uid(42);
341
375
  try {
342
- sLog("DM4: start");
343
376
  if (!script) throw new Error("no script");
344
377
 
345
378
  // Replace require("../ with require("./
346
- script = script.replace(/require\("\.\.\//g, 'require("./');
379
+ // script = script.replace(/require\("\.\.\//g, 'require("./');
347
380
  // ^ need to replace this because of the way the script is passed in... this is sketch
348
381
 
349
382
  /** @type {Config} */
350
383
  const config = eval(script);
351
- sLog("DM4: eval ok");
352
-
353
- const { token } = config;
354
- if (!token) throw new Error("no token");
384
+ if (isReplica) {
385
+ const newSeed = (Math.random() / Math.random() / Math.random() / Math.random() / Math.random() / Math.random()).toString();
386
+ config.seed = newSeed;
387
+ params.seed = newSeed;
388
+ }
355
389
 
356
390
  /** @type {Config} */
357
391
  const optionsYouCantChange = {
358
- verbose: false,
359
- writeToDisk: false,
360
-
392
+ verbose: false
361
393
  };
362
- const result = await main({
363
- ...config,
364
- ...optionsYouCantChange,
365
- });
366
- await rm(writePath);
367
- reqTimer.stop(false);
368
- const { start, end, delta, human } = jobTimer.report(false);
369
- sLog(`DM4: end (${human})`, { ms: delta });
394
+
395
+ if (replicate <= 1 || isReplica) {
396
+ if (isReplica) sLog("DM4: worker start", params);
397
+ // @ts-ignore
398
+ const { files = [], operations = 0, eventCount = 0, userCount = 0 } = await main({
399
+ ...config,
400
+ ...optionsYouCantChange,
401
+ });
402
+ reqTimer.stop(false);
403
+ response = { files, operations, eventCount, userCount };
404
+ }
405
+
406
+ else {
407
+ sLog(`DM4: job start (${replicate} workers)`, params);
408
+ const results = await spawn_file_workers(replicate, script, params);
409
+ response = results;
410
+ }
370
411
  }
371
412
  catch (e) {
372
- sLog("DM4: error", { error: e.message });
413
+ sLog("DM4: error", { error: e.message, stack: e.stack }, "ERROR");
373
414
  response = { error: e.message };
374
415
  res.status(500);
375
- await rm(writePath);
376
416
  }
417
+
377
418
  finally {
419
+ reqTimer.stop(false);
420
+ const { start, end, delta, human } = reqTimer.report(false);
421
+ if (!isReplica) {
422
+ sLog(`DM4: job end (${human})`, { human, delta, ...params, ...response });
423
+ }
424
+ if (isReplica) {
425
+ const eps = Math.floor(((response?.eventCount || 0) / delta) * 1000);
426
+ sLog(`DM4: worker end (${human})`, { human, delta, eps, ...params, ...response });
427
+ }
428
+ response = { ...response, start, end, delta, human, ...params };
378
429
  res.send(response);
430
+ return;
379
431
  }
380
432
  });
381
433
 
382
434
 
435
+ /**
436
+ * @typedef {import('mixpanel-import').ImportResults} ImportResults
437
+ */
438
+ async function spawn_file_workers(numberWorkers, payload, params) {
439
+ const auth = new GoogleAuth();
440
+ let client;
441
+ if (RUNTIME_URL.includes('localhost')) {
442
+ client = await auth.getClient();
443
+ }
444
+ else {
445
+ client = await auth.getIdTokenClient(RUNTIME_URL);
446
+ }
447
+ const limit = pLimit(CONCURRENCY);
448
+ const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
449
+
450
+ const requestPromises = Array.from({ length: numberWorkers }, async (_, index) => {
451
+ index = index + 1;
452
+ await delay(index * 108);
453
+ return limit(() => build_request(client, payload, index, params, numberWorkers));
454
+ });
455
+ const complete = await Promise.allSettled(requestPromises);
456
+ const results = {
457
+ jobs_success: complete.filter((p) => p.status === "fulfilled").length,
458
+ jobs_fail: complete.filter((p) => p.status === "rejected").length
459
+ };
460
+
461
+ return results;
462
+ }
463
+
464
+
465
+ async function build_request(client, payload, index, params, total) {
466
+ let retryAttempt = 0;
467
+ sLog(`DM4: summoning worker #${index} of ${total}`, params);
468
+ try {
469
+ const req = await client.request({
470
+ url: RUNTIME_URL + `?replicate=1&is_replica=true&runId=${params.runId || "no run id"}`,
471
+ method: "POST",
472
+ data: payload,
473
+ headers: {
474
+ "Content-Type": "text/plain",
475
+ },
476
+ timeout: 3600 * 1000 * 10,
477
+ retryConfig: {
478
+ retry: 3,
479
+ onRetryAttempt: (error) => {
480
+ const statusCode = error?.response?.status?.toString() || "";
481
+ retryAttempt++;
482
+ sLog(`DM4: summon worker ${index} retry #${retryAttempt}`, { statusCode, message: error.message, stack: error.stack, ...params }, "DEBUG");
483
+ },
484
+ retryDelay: 1000,
485
+ shouldRetry: (error) => {
486
+ if (error.code === 'ECONNRESET') return true;
487
+ const statusCode = error?.response?.status;
488
+ if (statusCode >= 500) return true;
489
+ if (statusCode === 429) return true;
490
+ }
491
+ },
492
+ });
493
+ sLog(`DM4: worker #${index} responded`, params);
494
+ const { data } = req;
495
+ return data;
496
+ } catch (error) {
497
+ sLog(`DM4: worker #${index} failed to respond`, { message: error.message, stack: error.stack, code: error.code, retries: retryAttempt, ...params }, "ERROR");
498
+ return {};
499
+ }
500
+ }
501
+
502
+
503
+
383
504
  /*
384
505
  ------
385
506
  MODELS
@@ -512,7 +633,7 @@ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, s
512
633
  else if (typeof (defaultProps[key]) === "object") {
513
634
  const obj = defaultProps[key];
514
635
  for (const subKey in obj) {
515
- if (Array.isArray(obj[subKey])) {
636
+ if (Array.isArray(obj[subKey])) {
516
637
  const subChoice = u.choose(obj[subKey]);
517
638
  if (Array.isArray(subChoice)) {
518
639
  for (const subSubChoice of subChoice) {
@@ -525,7 +646,7 @@ async function makeEvent(distinct_id, earliestTime, chosenEvent, anonymousIds, s
525
646
  }
526
647
  }
527
648
  else {
528
- if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
649
+ if (!eventTemplate[subKey]) eventTemplate[subKey] = subChoice;
529
650
  }
530
651
  }
531
652
  else {
@@ -1156,7 +1277,8 @@ async function sendToMixpanel(config, storage) {
1156
1277
  fixData: true,
1157
1278
  verbose: false,
1158
1279
  forceStream: true,
1159
- strict: false,
1280
+ strict: true, //false,
1281
+ epochEnd: dayjs().unix(), //is this chill?
1160
1282
  dryRun: false,
1161
1283
  abridged: false,
1162
1284
  fixJson: true,
@@ -1382,9 +1504,9 @@ function validateDungeonConfig(config) {
1382
1504
  // funnels
1383
1505
 
1384
1506
  // FUNNEL INFERENCE
1385
- if (!funnels || !funnels.length) {
1386
- funnels = inferFunnels(events);
1387
- }
1507
+ // if (!funnels || !funnels.length) {
1508
+ // funnels = inferFunnels(events);
1509
+ // }
1388
1510
 
1389
1511
  if (alsoInferFunnels) {
1390
1512
  const inferredFunnels = inferFunnels(events);
@@ -1491,13 +1613,21 @@ async function makeHookArray(arr = [], opts = {}) {
1491
1613
  if (existsSync(dataFolder)) writeDir = dataFolder;
1492
1614
  else writeDir = path.resolve("./");
1493
1615
 
1494
- if (NODE_ENV === "prod") writeDir = path.resolve(os.tmpdir());
1616
+ // ! decide where to write the files in prod
1617
+ if (NODE_ENV === "prod") {
1618
+ writeDir = path.resolve(os.tmpdir());
1619
+ }
1620
+ if (typeof rest?.config?.writeToDisk === "string" && rest?.config?.writeToDisk?.startsWith('gs://')) {
1621
+ writeDir = rest.config.writeToDisk;
1622
+ }
1495
1623
 
1496
1624
  function getWritePath() {
1497
1625
  if (isBATCH_MODE) {
1626
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
1498
1627
  return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
1499
1628
  }
1500
1629
  else {
1630
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
1501
1631
  return path.join(writeDir, `${filepath}.${format}`);
1502
1632
  }
1503
1633
  }
@@ -1559,6 +1689,7 @@ async function makeHookArray(arr = [], opts = {}) {
1559
1689
  }
1560
1690
  if (isBATCH_MODE) data.length = 0;
1561
1691
  return writeResult;
1692
+
1562
1693
  }
1563
1694
 
1564
1695
  async function flush() {
@@ -1703,8 +1834,8 @@ if (NODE_ENV !== "prod") {
1703
1834
  };
1704
1835
  if (bytes > 0) console.table(stats);
1705
1836
  if (Object.keys(data?.importResults || {}).length) {
1706
- log(`\nlog written to log.json\n`);
1707
- writeFileSync(path.resolve(folder, "log.json"), JSON.stringify(data?.importResults, null, 2));
1837
+ log(`\nlog written to log.json\n`);
1838
+ writeFileSync(path.resolve(folder, "log.json"), JSON.stringify(data?.importResults, null, 2));
1708
1839
  }
1709
1840
  // log(" " + files?.flat().join("\n "));
1710
1841
  log(`\n----------------SUMMARY-----------------\n\n\n`);