make-mp-data 2.0.22 → 2.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/entry.js CHANGED
@@ -27,6 +27,7 @@ import getCliParams from './lib/cli/cli.js';
27
27
  const simpleConfig = await import('./dungeons/simple.js');
28
28
  finalConfig = { ...simpleConfig.default, ...cliConfig };
29
29
  }
30
+
30
31
 
31
32
 
32
33
  const result = await main(finalConfig);
package/index.js CHANGED
@@ -194,8 +194,10 @@ async function main(config) {
194
194
  await generateCharts(context);
195
195
  }
196
196
 
197
- // Step 11a: Always flush lookup tables to disk (regardless of writeToDisk setting)
198
- await flushLookupTablesToDisk(storage, validatedConfig);
197
+ // Step 11a: flush lookup tables to disk (always as CSVs)
198
+ if (validatedConfig.writeToDisk) {
199
+ await flushLookupTablesToDisk(storage, validatedConfig);
200
+ }
199
201
 
200
202
  // Step 11b: Flush other storage containers to disk (if writeToDisk enabled)
201
203
  if (validatedConfig.writeToDisk) {
@@ -217,7 +219,7 @@ async function main(config) {
217
219
  return {
218
220
  ...extractedData,
219
221
  importResults,
220
- files: extractFileInfo(storage),
222
+ files: await extractFileInfo(storage, validatedConfig),
221
223
  time: { start, end, delta, human },
222
224
  operations: context.getOperations(),
223
225
  eventCount: context.getEventCount(),
@@ -426,7 +428,7 @@ async function generateCharts(context) {
426
428
  if (config.makeChart && storage.eventData?.length > 0) {
427
429
  const chartPath = typeof config.makeChart === 'string'
428
430
  ? config.makeChart
429
- : `./${config.simulationName}-timeline`;
431
+ : `./${config.name}-timeline`;
430
432
 
431
433
  await generateLineChart(storage.eventData, undefined, chartPath);
432
434
 
@@ -502,11 +504,13 @@ async function flushStorageToDisk(storage, config) {
502
504
  /**
503
505
  * Extract file information from storage containers
504
506
  * @param {import('./types').Storage} storage - Storage object
507
+ * @param {import('./types').Dungeon} config - Configuration object
505
508
  * @returns {string[]} Array of file paths
506
509
  */
507
- function extractFileInfo(storage) {
510
+ async function extractFileInfo(storage, config) {
508
511
  const files = [];
509
512
 
513
+ // Try to get paths from containers first
510
514
  Object.values(storage).forEach(container => {
511
515
  if (Array.isArray(container)) {
512
516
  container.forEach(subContainer => {
@@ -519,6 +523,55 @@ function extractFileInfo(storage) {
519
523
  }
520
524
  });
521
525
 
526
+ // If no files found from containers and writeToDisk is enabled, scan the data directory
527
+ if (files.length === 0 && config.writeToDisk) {
528
+ try {
529
+ const fs = await import('fs');
530
+ const path = await import('path');
531
+
532
+ let dataDir = path.resolve("./data");
533
+ if (!fs.existsSync(dataDir)) {
534
+ dataDir = path.resolve("./");
535
+ }
536
+
537
+ if (fs.existsSync(dataDir)) {
538
+ const allFiles = fs.readdirSync(dataDir);
539
+ const simulationName = config.name;
540
+
541
+ // Filter files that match our patterns and were likely created by this run
542
+ const relevantFiles = allFiles.filter(file => {
543
+ // Skip system files
544
+ if (file.startsWith('.')) return false;
545
+
546
+ // If we have a simulation name, only include files with that prefix
547
+ if (simulationName && !file.startsWith(simulationName)) {
548
+ return false;
549
+ }
550
+
551
+ // Check for common patterns
552
+ const hasEventPattern = file.includes('-EVENTS.');
553
+ const hasUserPattern = file.includes('-USERS.');
554
+ const hasScdPattern = file.includes('-SCD.');
555
+ const hasGroupPattern = file.includes('-GROUPS.');
556
+ const hasLookupPattern = file.includes('-LOOKUP.');
557
+ const hasAdspendPattern = file.includes('-ADSPEND.');
558
+ const hasMirrorPattern = file.includes('-MIRROR.');
559
+
560
+ return hasEventPattern || hasUserPattern || hasScdPattern ||
561
+ hasGroupPattern || hasLookupPattern || hasAdspendPattern || hasMirrorPattern;
562
+ });
563
+
564
+ // Convert to full paths
565
+ relevantFiles.forEach(file => {
566
+ files.push(path.join(dataDir, file));
567
+ });
568
+ }
569
+ } catch (error) {
570
+ // If scanning fails, just return empty array
571
+ console.warn('Warning: Could not scan data directory for files:', error.message);
572
+ }
573
+ }
574
+
522
575
  return files;
523
576
  }
524
577
 
package/lib/cli/cli.js CHANGED
@@ -223,6 +223,12 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
223
223
  type: 'boolean',
224
224
  coerce: boolCoerce
225
225
  })
226
+ .option("name", {
227
+ alias: 'n',
228
+ demandOption: false,
229
+ describe: 'custom name for generated files (prefix)',
230
+ type: 'string'
231
+ })
226
232
 
227
233
  .help()
228
234
  .wrap(null)
@@ -236,7 +242,7 @@ DATA MODEL: https://github.com/ak--47/make-mp-data/blob/main/default.js
236
242
  }
237
243
 
238
244
 
239
- function boolCoerce(value, foo) {
245
+ function boolCoerce(value) {
240
246
  if (typeof value === 'boolean') return value;
241
247
  if (typeof value === 'string') {
242
248
  return value.toLowerCase() === 'true';
@@ -136,9 +136,10 @@ export function validateDungeonConfig(config) {
136
136
  throw new Error("Either epochStart or numDays must be provided");
137
137
  }
138
138
 
139
- // Generate simulation name
140
- config.simulationName = name || makeName();
141
- config.name = config.simulationName;
139
+ // Use provided name if non-empty string, otherwise generate one
140
+ if (!name || name === "") {
141
+ name = makeName();
142
+ }
142
143
 
143
144
  // Validate events
144
145
  if (!events || !events.length) events = [{ event: "foo" }, { event: "bar" }, { event: "baz" }];
@@ -242,8 +243,7 @@ export function validateDungeonConfig(config) {
242
243
  hasAndroidDevices,
243
244
  hasDesktopDevices,
244
245
  hasIOSDevices,
245
- simulationName: config.simulationName,
246
- name: config.name
246
+ name
247
247
  };
248
248
 
249
249
  return validatedConfig;
@@ -40,6 +40,14 @@ function createDefaults(config, campaignData) {
40
40
  const weighedBrowsers = u.weighArray(devices.browsers);
41
41
  const weighedCampaigns = u.weighArray(campaignData);
42
42
 
43
+ // PERFORMANCE: Pre-compute device pools based on config to avoid rebuilding in makeEvent
44
+ const devicePools = {
45
+ android: config.hasAndroidDevices ? weighedAndroidDevices : [],
46
+ ios: config.hasIOSDevices ? weighedIOSDevices : [],
47
+ desktop: config.hasDesktopDevices ? weighedDesktopDevices : []
48
+ };
49
+ const allDevices = [...devicePools.android, ...devicePools.ios, ...devicePools.desktop];
50
+
43
51
  return {
44
52
  locationsUsers: () => weighedLocationsUsers,
45
53
  locationsEvents: () => weighedLocationsEvents,
@@ -47,7 +55,11 @@ function createDefaults(config, campaignData) {
47
55
  androidDevices: () => weighedAndroidDevices,
48
56
  desktopDevices: () => weighedDesktopDevices,
49
57
  browsers: () => weighedBrowsers,
50
- campaigns: () => weighedCampaigns
58
+ campaigns: () => weighedCampaigns,
59
+
60
+ // PERFORMANCE: Pre-computed device pools
61
+ devicePools,
62
+ allDevices
51
63
  };
52
64
  }
53
65
 
@@ -65,13 +65,15 @@ export async function createHookArray(arr = [], opts) {
65
65
  }
66
66
 
67
67
  function getWritePath() {
68
+ const gzipSuffix = (config.gzip && !writeDir?.startsWith('gs://')) ? '.gz' : '';
69
+
68
70
  if (isBatchMode) {
69
- if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}`;
70
- return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}`);
71
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}-part-${batch.toString()}.${format}${gzipSuffix}`;
72
+ return path.join(writeDir, `${filepath}-part-${batch.toString()}.${format}${gzipSuffix}`);
71
73
  }
72
74
  else {
73
- if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}`;
74
- return path.join(writeDir, `${filepath}.${format}`);
75
+ if (writeDir?.startsWith('gs://')) return `${writeDir}/${filepath}.${format}${gzipSuffix}`;
76
+ return path.join(writeDir, `${filepath}.${format}${gzipSuffix}`);
75
77
  }
76
78
  }
77
79
 
@@ -153,12 +155,19 @@ export async function createHookArray(arr = [], opts) {
153
155
  console.log(`\n\twriting ${writePath}\n`);
154
156
  }
155
157
 
158
+ const streamOptions = {
159
+ gzip: config.gzip || false
160
+ };
161
+
156
162
  switch (format) {
157
163
  case "csv":
158
- writeResult = await u.streamCSV(writePath, data);
164
+ writeResult = await u.streamCSV(writePath, data, streamOptions);
159
165
  break;
160
166
  case "json":
161
- writeResult = await u.streamJSON(writePath, data);
167
+ writeResult = await u.streamJSON(writePath, data, streamOptions);
168
+ break;
169
+ case "parquet":
170
+ writeResult = await u.streamParquet(writePath, data, streamOptions);
162
171
  break;
163
172
  default:
164
173
  throw new Error(`format ${format} is not supported`);
@@ -219,12 +228,15 @@ export class StorageManager {
219
228
  async initializeContainers() {
220
229
  const { config } = this.context;
221
230
 
231
+ // Validate configuration for potential data loss scenarios
232
+ this.validateConfiguration(config);
233
+
222
234
  /** @type {Storage} */
223
235
  const storage = {
224
236
  eventData: await createHookArray([], {
225
237
  hook: config.hook,
226
238
  type: "event",
227
- filepath: `${config.simulationName || 'events'}-EVENTS`,
239
+ filepath: `${config.name}-EVENTS`,
228
240
  format: config.format || "csv",
229
241
  concurrency: config.concurrency || 1,
230
242
  context: this.context
@@ -233,7 +245,7 @@ export class StorageManager {
233
245
  userProfilesData: await createHookArray([], {
234
246
  hook: config.hook,
235
247
  type: "user",
236
- filepath: `${config.simulationName || 'users'}-USERS`,
248
+ filepath: `${config.name}-USERS`,
237
249
  format: config.format || "csv",
238
250
  concurrency: config.concurrency || 1,
239
251
  context: this.context
@@ -242,7 +254,7 @@ export class StorageManager {
242
254
  adSpendData: await createHookArray([], {
243
255
  hook: config.hook,
244
256
  type: "ad-spend",
245
- filepath: `${config.simulationName || 'adspend'}-ADSPEND`,
257
+ filepath: `${config.name}-ADSPEND`,
246
258
  format: config.format || "csv",
247
259
  concurrency: config.concurrency || 1,
248
260
  context: this.context
@@ -255,7 +267,7 @@ export class StorageManager {
255
267
  mirrorEventData: await createHookArray([], {
256
268
  hook: config.hook,
257
269
  type: "mirror",
258
- filepath: `${config.simulationName || 'mirror'}-MIRROR`,
270
+ filepath: `${config.name}-MIRROR`,
259
271
  format: config.format || "csv",
260
272
  concurrency: config.concurrency || 1,
261
273
  context: this.context
@@ -268,7 +280,7 @@ export class StorageManager {
268
280
  const scdArray = await createHookArray([], {
269
281
  hook: config.hook,
270
282
  type: "scd",
271
- filepath: `${config.simulationName || 'scd'}-${scdKey}-SCD`,
283
+ filepath: `${config.name}-${scdKey}-SCD`,
272
284
  format: config.format || "csv",
273
285
  concurrency: config.concurrency || 1,
274
286
  context: this.context
@@ -284,7 +296,7 @@ export class StorageManager {
284
296
  const groupArray = await createHookArray([], {
285
297
  hook: config.hook,
286
298
  type: "group",
287
- filepath: `${config.simulationName || 'groups'}-${groupKey}-GROUPS`,
299
+ filepath: `${config.name}-${groupKey}-GROUPS`,
288
300
  format: config.format || "csv",
289
301
  concurrency: config.concurrency || 1,
290
302
  context: this.context
@@ -300,7 +312,7 @@ export class StorageManager {
300
312
  const lookupArray = await createHookArray([], {
301
313
  hook: config.hook,
302
314
  type: "lookup",
303
- filepath: `${config.simulationName || 'lookup'}-${lookupConfig.key}-LOOKUP`,
315
+ filepath: `${config.name}-${lookupConfig.key}-LOOKUP`,
304
316
  format: "csv", // Always force CSV for lookup tables
305
317
  concurrency: config.concurrency || 1,
306
318
  context: this.context
@@ -312,4 +324,24 @@ export class StorageManager {
312
324
 
313
325
  return storage;
314
326
  }
327
+
328
+ /**
329
+ * Validates configuration to prevent data loss scenarios
330
+ * @param {Object} config - Configuration object
331
+ */
332
+ validateConfiguration(config) {
333
+ // Check for potential data loss scenario: writeToDisk=false with low batchSize
334
+ if (config.writeToDisk === false) {
335
+ const batchSize = config.batchSize || 1_000_000;
336
+ const numEvents = config.numEvents || 0;
337
+
338
+ if (batchSize < numEvents) {
339
+ throw new Error(
340
+ `Configuration error: writeToDisk is explicitly set to false but batchSize (${batchSize}) is lower than numEvents (${numEvents}). ` +
341
+ `This would result in data loss as batched data would be discarded. ` +
342
+ `Either set writeToDisk to true, increase batchSize to be >= numEvents, or provide a Mixpanel token to send data directly.`
343
+ );
344
+ }
345
+ }
346
+ }
315
347
  }
@@ -72,7 +72,6 @@ export async function makeEvent(
72
72
  };
73
73
 
74
74
  let defaultProps = {};
75
- let devicePool = [];
76
75
 
77
76
  // Add default properties based on configuration
78
77
  if (hasLocation) {
@@ -82,32 +81,30 @@ export async function makeEvent(
82
81
  if (hasBrowser) {
83
82
  defaultProps.browser = u.choose(defaults.browsers());
84
83
  }
85
-
86
- // Build device pool based on enabled device types
87
- if (hasAndroidDevices) devicePool.push(defaults.androidDevices());
88
- if (hasIOSDevices) devicePool.push(defaults.iOSDevices());
89
- if (hasDesktopDevices) devicePool.push(defaults.desktopDevices());
90
84
 
91
85
  // Add campaigns with attribution likelihood
92
86
  if (hasCampaigns && chance.bool({ likelihood: 25 })) {
93
87
  defaultProps.campaigns = u.pickRandom(defaults.campaigns());
94
88
  }
95
89
 
96
- // Select device from pool
97
- const devices = devicePool.flat();
98
- if (devices.length) {
99
- defaultProps.device = u.pickRandom(devices);
90
+ // PERFORMANCE: Use pre-computed device pool instead of rebuilding every time
91
+ if (defaults.allDevices.length) {
92
+ defaultProps.device = u.pickRandom(defaults.allDevices);
100
93
  }
101
94
 
102
95
  // Set event time using TimeSoup for realistic distribution
103
96
  if (earliestTime) {
104
97
  if (isFirstEvent) {
105
- // Apply time shift to move to present day using precomputed value
106
- eventTemplate.time = dayjs.unix(earliestTime).add(context.TIME_SHIFT_SECONDS, 'seconds').toISOString();
98
+ // PERFORMANCE: Direct numeric calculation instead of dayjs object creation
99
+ const shiftedTimestamp = earliestTime + context.TIME_SHIFT_SECONDS;
100
+ eventTemplate.time = dayjs.unix(shiftedTimestamp).toISOString();
107
101
  } else {
108
- // Get time from TimeSoup and apply precomputed time shift
102
+ // Get time from TimeSoup (returns ISO string) and apply precomputed time shift
109
103
  const soupTime = u.TimeSoup(earliestTime, context.FIXED_NOW, peaks, deviation, mean);
110
- eventTemplate.time = dayjs(soupTime).add(context.TIME_SHIFT_SECONDS, 'seconds').toISOString();
104
+ // PERFORMANCE: Parse ISO directly to milliseconds, add shift, convert back to ISO with one dayjs call
105
+ const soupTimestamp = new Date(soupTime).getTime() / 1000; // Convert to unix seconds
106
+ const shiftedTimestamp = soupTimestamp + context.TIME_SHIFT_SECONDS;
107
+ eventTemplate.time = dayjs.unix(shiftedTimestamp).toISOString();
111
108
  }
112
109
  }
113
110
 
@@ -133,7 +130,9 @@ export async function makeEvent(
133
130
  // PERFORMANCE: Process properties directly without creating intermediate object
134
131
  // Add custom properties from event configuration
135
132
  if (chosenEvent.properties) {
136
- for (const key in chosenEvent.properties) {
133
+ const eventKeys = Object.keys(chosenEvent.properties);
134
+ for (let i = 0; i < eventKeys.length; i++) {
135
+ const key = eventKeys[i];
137
136
  try {
138
137
  eventTemplate[key] = u.choose(chosenEvent.properties[key]);
139
138
  } catch (e) {
@@ -145,7 +144,9 @@ export async function makeEvent(
145
144
 
146
145
  // Add super properties (override event properties if needed)
147
146
  if (superProps) {
148
- for (const key in superProps) {
147
+ const superKeys = Object.keys(superProps);
148
+ for (let i = 0; i < superKeys.length; i++) {
149
+ const key = superKeys[i];
149
150
  try {
150
151
  eventTemplate[key] = u.choose(superProps[key]);
151
152
  } catch (e) {
@@ -128,7 +128,13 @@ function buildFunnelEvents(context, sequence, chosenFunnelProps) {
128
128
 
129
129
  return sequence.map((eventName) => {
130
130
  const foundEvent = config.events?.find((e) => e.event === eventName);
131
- const eventSpec = u.deepClone(foundEvent) || { event: eventName, properties: {} };
131
+
132
+ // PERFORMANCE: Shallow copy instead of deepClone for better performance
133
+ // We only need to copy the top-level structure since we're rebuilding properties anyway
134
+ const eventSpec = foundEvent ? {
135
+ event: foundEvent.event,
136
+ properties: { ...foundEvent.properties }
137
+ } : { event: eventName, properties: {} };
132
138
 
133
139
  // Process event properties
134
140
  for (const key in eventSpec.properties) {
@@ -139,11 +145,7 @@ function buildFunnelEvents(context, sequence, chosenFunnelProps) {
139
145
  }
140
146
  }
141
147
 
142
- // Clean up funnel-specific properties
143
- delete eventSpec.isFirstEvent;
144
- delete eventSpec.weight;
145
-
146
- // Merge funnel properties
148
+ // Merge funnel properties (no need to delete properties since we're creating a new object)
147
149
  eventSpec.properties = { ...eventSpec.properties, ...chosenFunnelProps };
148
150
 
149
151
  return eventSpec;
@@ -8,6 +8,7 @@ import utc from 'dayjs/plugin/utc.js';
8
8
  import path from 'path';
9
9
  import { mkdir, parseGCSUri } from 'ak-tools';
10
10
  import { existsSync } from 'fs';
11
+ import zlib from 'zlib';
11
12
  dayjs.extend(utc);
12
13
  import 'dotenv/config';
13
14
  import { domainSuffix, domainPrefix } from '../templates/defaults.js';
@@ -483,15 +484,22 @@ STREAMERS
483
484
  ----
484
485
  */
485
486
 
486
- function streamJSON(filePath, data) {
487
+ function streamJSON(filePath, data, options = {}) {
487
488
  return new Promise((resolve, reject) => {
488
489
  let writeStream;
490
+ const { gzip = false } = options;
491
+
489
492
  if (filePath?.startsWith('gs://')) {
490
493
  const { uri, bucket, file } = parseGCSUri(filePath);
491
494
  writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
492
495
  }
493
496
  else {
494
497
  writeStream = fs.createWriteStream(filePath, { encoding: 'utf8' });
498
+ if (gzip) {
499
+ const gzipStream = zlib.createGzip();
500
+ gzipStream.pipe(writeStream);
501
+ writeStream = gzipStream;
502
+ }
495
503
  }
496
504
  data.forEach(item => {
497
505
  writeStream.write(JSON.stringify(item) + '\n');
@@ -504,15 +512,22 @@ function streamJSON(filePath, data) {
504
512
  });
505
513
  }
506
514
 
507
- function streamCSV(filePath, data) {
515
+ function streamCSV(filePath, data, options = {}) {
508
516
  return new Promise((resolve, reject) => {
509
517
  let writeStream;
518
+ const { gzip = false } = options;
519
+
510
520
  if (filePath?.startsWith('gs://')) {
511
521
  const { uri, bucket, file } = parseGCSUri(filePath);
512
522
  writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
513
523
  }
514
524
  else {
515
525
  writeStream = fs.createWriteStream(filePath, { encoding: 'utf8' });
526
+ if (gzip) {
527
+ const gzipStream = zlib.createGzip();
528
+ gzipStream.pipe(writeStream);
529
+ writeStream = gzipStream;
530
+ }
516
531
  }
517
532
 
518
533
  // Extract all unique keys from the data array
@@ -539,6 +554,117 @@ function streamCSV(filePath, data) {
539
554
  });
540
555
  }
541
556
 
557
+ async function streamParquet(filePath, data, options = {}) {
558
+ const { gzip = false } = options;
559
+
560
+ // Dynamically import hyparquet-writer
561
+ const { parquetWriteFile, parquetWriteBuffer } = await import('hyparquet-writer');
562
+
563
+ if (data.length === 0) {
564
+ throw new Error('Cannot write parquet file with empty data');
565
+ }
566
+
567
+ // Extract column names and data from the input array
568
+ const columns = getUniqueKeys(data);
569
+ const columnData = columns.map(columnName => {
570
+ const columnValues = data.map(row => {
571
+ let value = row[columnName];
572
+
573
+ // Handle null/undefined values
574
+ if (value === null || value === undefined) {
575
+ return null;
576
+ }
577
+
578
+ // Convert objects to strings
579
+ if (typeof value === 'object') {
580
+ value = JSON.stringify(value);
581
+ }
582
+
583
+ return value;
584
+ });
585
+
586
+ // Determine the type based on the first non-null value
587
+ let type = 'STRING'; // default
588
+ const firstValue = columnValues.find(v => v !== null && v !== undefined);
589
+
590
+ if (firstValue !== undefined) {
591
+ if (typeof firstValue === 'boolean') {
592
+ type = 'BOOLEAN';
593
+ } else if (typeof firstValue === 'number') {
594
+ // For parquet compatibility, convert numbers to appropriate types
595
+ if (Number.isInteger(firstValue)) {
596
+ // Use INT32 for smaller integers, convert to BigInt for INT64 if needed
597
+ if (firstValue >= -2147483648 && firstValue <= 2147483647) {
598
+ type = 'INT32';
599
+ } else {
600
+ type = 'INT64';
601
+ // Convert all values to BigInt for INT64
602
+ for (let i = 0; i < columnValues.length; i++) {
603
+ if (columnValues[i] !== null && columnValues[i] !== undefined) {
604
+ columnValues[i] = BigInt(columnValues[i]);
605
+ }
606
+ }
607
+ }
608
+ } else {
609
+ type = 'DOUBLE';
610
+ }
611
+ } else if (firstValue instanceof Date) {
612
+ type = 'TIMESTAMP';
613
+ }
614
+ }
615
+
616
+ return {
617
+ name: columnName,
618
+ data: columnValues,
619
+ type: type
620
+ };
621
+ });
622
+
623
+ if (filePath?.startsWith('gs://')) {
624
+ // For GCS, write to buffer first, then upload
625
+ const arrayBuffer = parquetWriteBuffer({ columnData });
626
+ const { bucket, file } = parseGCSUri(filePath);
627
+
628
+ const writeStream = storage.bucket(bucket).file(file).createWriteStream({
629
+ gzip: gzip || true // Always gzip for GCS
630
+ });
631
+
632
+ return new Promise((resolve, reject) => {
633
+ writeStream.write(Buffer.from(arrayBuffer));
634
+ writeStream.end();
635
+ writeStream.on('finish', () => resolve(filePath));
636
+ writeStream.on('error', reject);
637
+ });
638
+ } else {
639
+ // For local files
640
+ let actualFilePath = filePath;
641
+ if (gzip && !filePath.endsWith('.gz')) {
642
+ actualFilePath = filePath + '.gz';
643
+ }
644
+
645
+ if (gzip) {
646
+ // Write to buffer then gzip to disk
647
+ const arrayBuffer = parquetWriteBuffer({ columnData });
648
+ const buffer = Buffer.from(arrayBuffer);
649
+ const gzippedBuffer = zlib.gzipSync(buffer);
650
+
651
+ return new Promise((resolve, reject) => {
652
+ fs.writeFile(actualFilePath, gzippedBuffer, (err) => {
653
+ if (err) reject(err);
654
+ else resolve(actualFilePath);
655
+ });
656
+ });
657
+ } else {
658
+ // Direct write to disk
659
+ parquetWriteFile({
660
+ filename: filePath,
661
+ columnData
662
+ });
663
+ return Promise.resolve(filePath);
664
+ }
665
+ }
666
+ }
667
+
542
668
 
543
669
  /*
544
670
  ----
@@ -870,7 +996,7 @@ function buildFileNames(config) {
870
996
  let extension = "";
871
997
  extension = format === "csv" ? "csv" : "json";
872
998
  // const current = dayjs.utc().format("MM-DD-HH");
873
- let simName = config.simulationName;
999
+ let simName = config.name;
874
1000
  let writeDir = typeof config.writeToDisk === 'string' ? config.writeToDisk : "./";
875
1001
  if (config.writeToDisk) {
876
1002
  const dataFolder = path.resolve("./data");
@@ -1328,7 +1454,7 @@ export {
1328
1454
  TimeSoup,
1329
1455
  companyName,
1330
1456
  generateEmoji,
1331
- hasSameKeys as haveSameKeys,
1457
+ hasSameKeys,
1332
1458
  deepClone,
1333
1459
  initChance,
1334
1460
  getChance,
@@ -1362,6 +1488,7 @@ export {
1362
1488
  buildFileNames,
1363
1489
  streamJSON,
1364
1490
  streamCSV,
1491
+ streamParquet,
1365
1492
  datesBetween,
1366
1493
  weighChoices,
1367
1494
  wrapFunc,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "make-mp-data",
3
- "version": "2.0.22",
3
+ "version": "2.0.23",
4
4
  "description": "builds all mixpanel primitives for a given project",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -70,6 +70,7 @@
70
70
  "dayjs": "^1.11.11",
71
71
  "dotenv": "^16.4.5",
72
72
  "google-auth-library": "^9.15.0",
73
+ "hyparquet-writer": "^0.6.1",
73
74
  "mixpanel-import": "^2.8.162",
74
75
  "p-limit": "^3.1.0",
75
76
  "yargs": "^17.7.2"
package/types.d.ts CHANGED
@@ -20,7 +20,7 @@ export interface Dungeon {
20
20
  epochEnd?: number;
21
21
  numEvents?: number;
22
22
  numUsers?: number;
23
- format?: "csv" | "json" | string;
23
+ format?: "csv" | "json" | "parquet" | string;
24
24
  region?: "US" | "EU";
25
25
  concurrency?: number;
26
26
  batchSize?: number;
@@ -30,7 +30,6 @@ export interface Dungeon {
30
30
  projectId?: string;
31
31
 
32
32
  // ids
33
- simulationName?: string;
34
33
  name?: string;
35
34
 
36
35
  //switches
@@ -44,6 +43,7 @@ export interface Dungeon {
44
43
  hasDesktopDevices?: boolean;
45
44
  hasBrowser?: boolean;
46
45
  writeToDisk?: boolean | string;
46
+ gzip?: boolean;
47
47
  verbose?: boolean;
48
48
  hasAnonIds?: boolean;
49
49
  hasSessionIds?: boolean;
@@ -117,7 +117,7 @@ export interface hookArrayOptions<T> {
117
117
  hook?: Hook<T>;
118
118
  type?: hookTypes;
119
119
  filename?: string;
120
- format?: "csv" | "json" | string;
120
+ format?: "csv" | "json" | "parquet" | string;
121
121
  concurrency?: number;
122
122
  context?: Context;
123
123
  [key: string]: any;