make-mp-data 3.0.3 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/README.md +46 -0
  2. package/dungeons/array-of-object-lookup-schema.json +327 -0
  3. package/dungeons/array-of-object-lookup.js +29 -9
  4. package/dungeons/capstone/capstone-ic3.js +291 -0
  5. package/dungeons/capstone/capstone-ic4.js +598 -0
  6. package/dungeons/capstone/capstone-ic5.js +668 -0
  7. package/dungeons/capstone/generate-product-lookup.js +309 -0
  8. package/dungeons/ecommerce-schema.json +462 -0
  9. package/dungeons/{copilot.js → ecommerce.js} +79 -17
  10. package/dungeons/education-schema.json +2409 -0
  11. package/dungeons/education.js +226 -462
  12. package/dungeons/fintech-schema.json +14034 -0
  13. package/dungeons/fintech.js +134 -413
  14. package/dungeons/foobar-schema.json +403 -0
  15. package/dungeons/foobar.js +27 -4
  16. package/dungeons/food-delivery-schema.json +192 -0
  17. package/dungeons/food-delivery.js +602 -0
  18. package/dungeons/food-schema.json +1152 -0
  19. package/dungeons/food.js +173 -406
  20. package/dungeons/gaming-schema.json +1270 -0
  21. package/dungeons/gaming.js +182 -42
  22. package/dungeons/insurance-application-schema.json +204 -0
  23. package/dungeons/insurance-application.js +605 -0
  24. package/dungeons/media-schema.json +906 -0
  25. package/dungeons/media.js +250 -420
  26. package/dungeons/retention-cadence-schema.json +78 -0
  27. package/dungeons/retention-cadence.js +35 -1
  28. package/dungeons/rpg-schema.json +4526 -0
  29. package/dungeons/rpg.js +171 -429
  30. package/dungeons/sanity-schema.json +255 -0
  31. package/dungeons/sanity.js +21 -10
  32. package/dungeons/sass-schema.json +1291 -0
  33. package/dungeons/sass.js +241 -368
  34. package/dungeons/scd-schema.json +919 -0
  35. package/dungeons/scd.js +41 -13
  36. package/dungeons/simple-schema.json +608 -0
  37. package/dungeons/simple.js +52 -15
  38. package/dungeons/simplest-schema.json +1418 -0
  39. package/dungeons/simplest.js +392 -0
  40. package/dungeons/social-schema.json +1118 -0
  41. package/dungeons/social.js +150 -391
  42. package/dungeons/text-generation-schema.json +3096 -0
  43. package/dungeons/text-generation.js +71 -0
  44. package/index.js +8 -6
  45. package/lib/core/config-validator.js +28 -8
  46. package/lib/core/storage.js +5 -5
  47. package/lib/generators/events.js +4 -4
  48. package/lib/orchestrators/mixpanel-sender.js +16 -13
  49. package/lib/orchestrators/user-loop.js +14 -6
  50. package/lib/templates/soup-presets.js +188 -0
  51. package/lib/utils/utils.js +52 -6
  52. package/package.json +1 -1
  53. package/types.d.ts +20 -3
  54. package/dungeons/adspend.js +0 -130
  55. package/dungeons/anon.js +0 -128
  56. package/dungeons/benchmark-heavy.js +0 -240
  57. package/dungeons/benchmark-light.js +0 -140
  58. package/dungeons/big.js +0 -226
  59. package/dungeons/business.js +0 -391
  60. package/dungeons/complex.js +0 -428
  61. package/dungeons/experiments.js +0 -137
  62. package/dungeons/funnels.js +0 -309
  63. package/dungeons/mil.js +0 -323
  64. package/dungeons/mirror.js +0 -161
  65. package/dungeons/soup-test.js +0 -52
  66. package/dungeons/streaming.js +0 -372
  67. package/dungeons/strict-event-test.js +0 -30
  68. package/dungeons/student-teacher.js +0 -438
  69. package/dungeons/too-big-events.js +0 -203
  70. package/dungeons/user-agent.js +0 -209
@@ -0,0 +1,668 @@
1
+ /**
2
+ * ═══════════════════════════════════════════════════════════════
3
+ * DATASET OVERVIEW
4
+ * ═══════════════════════════════════════════════════════════════
5
+ *
6
+ * IC5 Capstone — advanced enterprise SaaS for Mixpanel certification.
7
+ * - Complex events with deeply nested metadata (server, request, response)
8
+ * - Text generation for support tickets and social posts
9
+ * - Session computation with duration_ms, written to external JSONL
10
+ * - Key name remapping (event→action, time→occurred_at, etc.)
11
+ * - Profile cleanup (removes anonymousIds, sessionIds)
12
+ *
13
+ * ═══════════════════════════════════════════════════════════════
14
+ * ANALYTICS HOOKS (1 pattern — everything hook)
15
+ * ═══════════════════════════════════════════════════════════════
16
+ *
17
+ * 1. DATA TRANSFORMATION + DUPLICATION (everything hook)
18
+ * - Renames keys: event→action, time→occurred_at, device_id→client_id, user_id→auth_uuid
19
+ * - Cleans profile: removes anonymousIds, sessionIds
20
+ * - Computes session aggregates with duration and writes to external file
21
+ * - Removes insert_id and session_id from events
22
+ * - Duplicates 2-10% of events per user (proportional to event count)
23
+ * Students must work with non-standard key names and handle duplicates.
24
+ */
25
+
26
+ import Chance from 'chance';
27
+ const chance = new Chance();
28
+ import { weighNumRange, date, integer } from "../brain/utils/utils.js";
29
+ import * as u from 'ak-tools';
30
+ import fs from 'fs';
31
+ import path from 'path';
32
+ import { fileURLToPath } from 'url';
33
+
34
+ import { createTextGenerator, generateBatch } from "../brain/generators/text.js";
35
+ import { PHRASE_BANK } from "../brain/templates/phrases.js";
36
+ import dayjs from 'dayjs';
37
+
38
+ // create this file if it doesn't exist
39
+ const __filename = fileURLToPath(import.meta.url);
40
+ const __dirname = path.dirname(__filename);
41
+ const sessionsFile = path.join(__dirname, '../data/generated_sessions.jsonl');
42
+
43
+ // Create the sessions file if it doesn't exist (JSON Lines format)
44
+ if (!fs.existsSync(sessionsFile)) {
45
+ fs.writeFileSync(sessionsFile, '', 'utf8');
46
+ }
47
+
48
+
49
+ const enterpriseSupportGen = createTextGenerator({
50
+ style: "support",
51
+ tone: "neg",
52
+ intensity: "high",
53
+ formality: "technical",
54
+ keywords: {
55
+ features: ['Dashboard Analytics', 'Export API', 'SSO Login', 'Admin Console', 'User Management', 'SAML Authentication', 'OAuth Integration', 'Multi-Factor Auth', 'Role-Based Access Control', 'Audit Logs', 'Webhook Configuration', 'Data Pipeline', 'Custom Reports', 'Scheduled Exports', 'Real-Time Sync', 'Bulk Import', 'API Rate Limits', 'Usage Metrics', 'Team Permissions', 'Single Sign-On', 'LDAP Integration', 'Data Retention Policies', 'Custom Domains', 'White-Label Branding', 'Advanced Filtering', 'Query Builder', 'Notification Rules', 'Email Templates', 'Workflow Automation', 'Integration Hub', 'Data Validation', 'Field Mapping', 'Schema Management', 'Version Control', 'Rollback Capability', 'Disaster Recovery', 'High Availability', 'Load Balancing', 'Auto-Scaling', 'Performance Monitoring'],
56
+ products: ['DataViz Pro', 'Enterprise Suite', 'v3.2.1', 'v2.8.4', 'Analytics Platform', 'Cloud Dashboard', 'v4.1.0', 'v3.9.2', 'Enterprise Edition', 'Professional Tier', 'Business Intelligence Module', 'Data Warehouse Connector', 'Mobile SDK', 'REST API v2', 'GraphQL Endpoint', 'Legacy Platform', 'Next-Gen Analytics', 'Premium Package', 'Ultimate Plan', 'Advanced Analytics Suite', 'Reporting Engine', 'Data Integration Platform', 'ETL Pipeline', 'Real-Time Analytics', 'Batch Processing Module', 'Stream Processing', 'Visualization Library', 'Dashboard Builder', 'Report Designer'],
57
+ technical: ['CORS error', 'timeout', 'memory leak', 'authentication', 'database', 'connection pooling', 'rate limiting', 'SSL handshake', 'DNS resolution', 'load balancer', 'cache invalidation', 'session timeout', 'deadlock detection', 'query optimization', 'index fragmentation', 'replication lag', 'connection refused', 'network latency', 'packet loss', 'firewall rules', 'proxy configuration', 'certificate expiration', 'API throttling', 'websocket disconnection', 'redis cluster', 'kafka consumer lag', 'database migration', 'schema validation', 'token refresh', 'OAuth flow', 'SAML assertion', 'JWT validation', 'API Gateway', 'reverse proxy', 'CDN distribution', 'geo-replication', 'data consistency', 'eventual consistency', 'distributed transactions', 'circuit breaker', 'retry logic', 'exponential backoff', 'service mesh', 'container orchestration', 'pod scheduling', 'horizontal scaling', 'vertical scaling', 'auto-discovery', 'health checks', 'graceful shutdown'],
58
+ errors: ['ERR_CONNECTION_REFUSED', '500 Internal Server', 'TIMEOUT_ERROR', 'AUTH_FAILED', '502 Bad Gateway', '503 Service Unavailable', '504 Gateway Timeout', '401 Unauthorized', '403 Forbidden', '429 Too Many Requests', 'ECONNRESET', 'ETIMEDOUT', 'ERR_SSL_PROTOCOL', 'DATABASE_CONNECTION_FAILED', 'INVALID_TOKEN', 'SESSION_EXPIRED', 'QUOTA_EXCEEDED', 'CORS_POLICY_VIOLATION', 'VALIDATION_ERROR', 'PERMISSION_DENIED', 'RESOURCE_NOT_FOUND', 'DUPLICATE_ENTRY', 'CONSTRAINT_VIOLATION', 'OUT_OF_MEMORY', 'DISK_FULL', 'MAX_CONNECTIONS_REACHED', 'DEADLOCK_DETECTED', 'TRANSACTION_ROLLBACK', 'REPLICATION_ERROR', 'CLUSTER_SPLIT_BRAIN', 'FAILOVER_TRIGGERED', 'BACKUP_FAILED', 'RESTORE_ERROR', 'MIGRATION_FAILED', 'SCHEMA_MISMATCH', 'VERSION_CONFLICT'],
59
+ competitors: ['Tableau', 'PowerBI', 'Looker', 'Qlik', 'Domo', 'Sisense', 'ThoughtSpot', 'Mode Analytics', 'Metabase', 'Redash', 'Chartio', 'Periscope Data', 'Google Data Studio', 'Amazon QuickSight', 'IBM Cognos', 'SAP Analytics', 'Oracle BI', 'MicroStrategy', 'Yellowfin', 'Birst', 'Alteryx', 'Dataiku', 'Databricks', 'Snowflake', 'BigQuery', 'Redshift', 'Azure Synapse', 'Splunk', 'New Relic', 'Datadog']
60
+ },
61
+ mixedSentiment: true,
62
+ authenticityLevel: 0.7,
63
+ typos: true,
64
+ typoRate: 0.02,
65
+ specificityLevel: 0.8,
66
+ min: 80,
67
+ max: 254,
68
+ includeMetadata: false,
69
+ });
70
+
71
+ // Search queries with realistic typos
72
+ const searchQueryGen = createTextGenerator({
73
+ style: "search",
74
+ tone: "neu",
75
+ formality: "casual",
76
+ keywords: {
77
+ features: ['export data', 'user settings', 'help docs', 'pricing', 'billing', 'account settings', 'password reset', 'two factor authentication', 'team management', 'permissions', 'API keys', 'webhooks', 'integrations', 'custom fields', 'bulk upload', 'import CSV', 'export PDF', 'download report', 'sharing options', 'collaboration', 'notifications', 'email preferences', 'mobile app', 'desktop app', 'browser extension', 'keyboard shortcuts', 'templates', 'automation', 'workflows', 'filters', 'search', 'sorting', 'grouping', 'charts', 'dashboards', 'analytics', 'reports', 'insights', 'trends', 'forecasting', 'benchmarks', 'goals', 'KPIs', 'metrics', 'data visualization', 'custom reports', 'scheduled reports', 'alerts', 'reminders', 'tasks', 'projects', 'files', 'attachments', 'comments', 'mentions', 'tags', 'labels', 'categories', 'folders', 'archive', 'trash', 'restore', 'backup', 'export', 'import', 'migrate', 'sync', 'offline access', 'dark theme', 'language settings', 'time zone', 'date format', 'currency', 'units', 'accessibility', 'privacy', 'security', 'compliance', 'audit trail', 'version history', 'changelog'],
78
+ technical: ['API documentation', 'integration guide', 'troubleshooting', 'SDK reference', 'REST API', 'GraphQL', 'authentication', 'OAuth', 'SSO setup', 'SAML', 'rate limits', 'error codes', 'status page', 'changelog', 'release notes', 'migration guide', 'upgrade path', 'deprecation notice', 'API versioning', 'webhooks setup', 'callback URL', 'payload format', 'request headers', 'response codes', 'pagination', 'filtering', 'sorting API', 'batch endpoints', 'async operations', 'retry logic', 'timeout settings', 'CORS configuration', 'IP whitelist', 'security best practices', 'code examples', 'sample requests', 'postman collection', 'curl commands', 'client libraries', 'npm packages', 'pip packages', 'gem install', 'maven dependency', 'nuget package', 'composer require', 'quickstart tutorial', 'getting started', 'hello world', 'first API call', 'authentication flow', 'token management', 'refresh tokens', 'scopes', 'permissions', 'roles', 'rate limit handling', 'error handling', 'best practices', 'common pitfalls', 'FAQ', 'support forum']
79
+ },
80
+ typos: true,
81
+ typoRate: 0.05,
82
+ min: 2,
83
+ max: 255,
84
+ includeMetadata: false
85
+ });
86
+
87
+ // Business feedback with professional tone
88
+ const businessFeedbackGen = createTextGenerator({
89
+ style: "feedback",
90
+ tone: "neu",
91
+ formality: "business",
92
+ keywords: {
93
+ metrics: ['ROI', 'efficiency', 'cost savings', 'productivity', 'revenue growth', 'profit margin', 'conversion rate', 'customer acquisition cost', 'lifetime value', 'churn rate', 'retention rate', 'engagement rate', 'adoption rate', 'time to value', 'operational efficiency', 'resource utilization', 'cost per transaction', 'revenue per user', 'gross margin', 'net promoter score', 'customer satisfaction', 'employee satisfaction', 'market share', 'growth rate', 'scalability', 'performance metrics', 'KPI achievement', 'goal attainment', 'benchmark comparison', 'industry standards', 'competitive advantage', 'process improvement', 'quality metrics', 'error rate', 'uptime SLA', 'response time SLA', 'mean time to resolution', 'first contact resolution', 'customer effort score', 'employee Net Promoter Score', 'revenue per employee', 'operating margin', 'EBITDA', 'cash flow', 'burn rate', 'runway', 'payback period', 'IRR', 'NPV', 'break-even analysis', 'variance analysis', 'trend analysis', 'cohort analysis', 'funnel conversion', 'pipeline velocity', 'win rate', 'average deal size', 'sales cycle length', 'quota attainment', 'forecast accuracy'],
94
+ features: ['reporting', 'analytics', 'integration capabilities', 'dashboard customization', 'automated workflows', 'data visualization', 'forecasting tools', 'predictive analytics', 'real-time monitoring', 'alert system', 'custom reports', 'scheduled reports', 'export functionality', 'API access', 'bulk operations', 'team collaboration', 'role-based access', 'audit trails', 'compliance features', 'data security', 'backup and recovery', 'scalability', 'mobile access', 'offline capabilities', 'third-party integrations', 'CRM integration', 'ERP integration', 'accounting software', 'payment gateways', 'marketing tools', 'customer support', 'training resources', 'documentation', 'professional services', 'dedicated support', 'white-glove onboarding', 'custom development', 'consulting services', 'implementation support', 'change management', 'data migration', 'system integration', 'workflow automation', 'process optimization', 'business intelligence', 'advanced analytics', 'machine learning', 'AI capabilities', 'natural language processing', 'sentiment analysis', 'anomaly detection', 'pattern recognition']
95
+ },
96
+ authenticityLevel: 0.3,
97
+ specificityLevel: 0.7,
98
+ min: 40,
99
+ max: 1000,
100
+ includeMetadata: false
101
+ });
102
+
103
+
104
+ /** @type {import('../types.js').Dungeon} */
105
+ const config = {
106
+ token: "",
107
+ seed: "this is going to be hard",
108
+ name: "ic5-capstone",
109
+ numDays: 180, //how many days worth of data
110
+ numEvents: 5_000_000, //how many events
111
+ numUsers: 25_000, //how many users
112
+ format: 'parquet', //csv or json
113
+ region: "US",
114
+ gzip: false,
115
+ hasAnonIds: true, //if true, anonymousIds are created for each user
116
+ hasSessionIds: true, //if true, hasSessionIds are created for each user
117
+
118
+ hasLocation: true,
119
+ hasAndroidDevices: false,
120
+ hasIOSDevices: false,
121
+ hasDesktopDevices: false,
122
+ hasBrowser: false,
123
+ hasCampaigns: false,
124
+ isAnonymous: false,
125
+ hasAdSpend: true,
126
+
127
+ hasAvatar: true,
128
+ makeChart: false,
129
+
130
+ batchSize: 250_000,
131
+ concurrency: 1,
132
+ writeToDisk: true,
133
+
134
+ funnels: [],
135
+ events: [
136
+ {
137
+ "event": "checkout",
138
+ "weight": 2,
139
+ "properties": {
140
+ currency: ["USD", "USD", "USD", "CAD", "EUR", "EUR", "BTC", "BTC", "ETH", "JPY"],
141
+ cart: makeProducts(),
142
+ }
143
+ },
144
+ {
145
+ "event": "add to cart",
146
+ "weight": 4,
147
+ "properties": {
148
+ amount: weighNumRange(5, 500, .25),
149
+ qty: integer(1, 5),
150
+ product_id: weighNumRange(1, 250000, 1.4)
151
+ }
152
+ },
153
+ {
154
+ "event": "page view",
155
+ "weight": 10,
156
+ "properties": {
157
+ page: ["/", "/", "/", "/learn-more", "/pricing", "/contact", "/about", "/careers", "/sign-up", "/login", "/app", "/app", "/app", "/app"],
158
+ utm_source: ["$organic", "$organic", "$organic", "$organic", "google", "google", "google", "facebook", "facebook", "twitter", "linkedin"],
159
+ }
160
+ },
161
+ {
162
+ event: "search",
163
+ weight: 1, // Most frequent event
164
+ properties: {
165
+ query_text: () => searchQueryGen.generateOne(),
166
+ search_category: ["help", "documentation", "features", "pricing", "support"],
167
+ results_found: weighNumRange(0, 100, 0.3),
168
+ clicked_result: [true, true, false], // 67% click through
169
+ result_position_clicked: weighNumRange(1, 10, 0.8), // Most click top results
170
+ search_duration_seconds: weighNumRange(1, 300, 0.7),
171
+ refined_search: [true, false, false, false] // 25% refine search
172
+ }
173
+ },
174
+ {
175
+ event: "write feedback",
176
+ weight: 1,
177
+ properties: {
178
+ feedback_text: () => businessFeedbackGen.generateOne(),
179
+ satisfaction_score: weighNumRange(1, 10, 0.3),
180
+ feedback_category: ["feature_request", "improvement", "compliment", "concern"],
181
+ department: ["engineering", "sales", "marketing", "support", "executive"],
182
+ follow_up_requested: [true, false, false], // 33% want follow-up
183
+ urgency: ["low", "medium", "high"],
184
+ implementation_priority: weighNumRange(1, 5, 0.4)
185
+ }
186
+ },
187
+ {
188
+ "event": "watch video",
189
+ "weight": 8,
190
+ "properties": {
191
+ category: ["funny", "educational", "inspirational", "music", "news", "sports", "cooking", "DIY", "travel", "gaming"],
192
+ hashTags: makeHashTags,
193
+ watchTimeSec: weighNumRange(10, 600, .25,),
194
+ quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
195
+ format: ["mp4", "avi", "mov", "mpg"],
196
+ video_id: weighNumRange(1, 500_000, 1.4),
197
+
198
+ }
199
+ },
200
+ {
201
+ "event": "comment",
202
+ "weight": 2,
203
+ "properties": {
204
+ length: weighNumRange(1, 500, .25),
205
+ video_id: weighNumRange(1, 500_000, 1.4),
206
+ has_replies: [true, false, false, false, false],
207
+ has_photo: [true, false, false, false, false],
208
+
209
+ }
210
+ },
211
+ {
212
+ "event": "save video",
213
+ "weight": 4,
214
+ "properties": {
215
+ video_id: weighNumRange(1, 500_000, 1.4),
216
+ ui_control: ["toolbar", "menu", "keyboard"]
217
+
218
+
219
+ }
220
+ },
221
+ {
222
+ "event": "view item",
223
+ "weight": 8,
224
+ "properties": {
225
+ product_id: weighNumRange(1, 250000, 3),
226
+ colors: ["light", "dark", "custom", "dark"]
227
+ }
228
+ },
229
+ {
230
+ "event": "save item",
231
+ "weight": 5,
232
+ "properties": {
233
+ product_id: weighNumRange(1, 250000, 12),
234
+ colors: ["light", "dark", "custom", "dark"]
235
+ }
236
+ },
237
+ {
238
+ "event": "support ticket",
239
+ "weight": 2,
240
+ "properties": {
241
+ severity: ["low", "medium", "high"],
242
+ ticket_id: chance.guid.bind(chance),
243
+ ticket_text: () => enterpriseSupportGen.generateOne()
244
+ }
245
+ },
246
+ {
247
+ "event": "sign up",
248
+ "isFirstEvent": true,
249
+ "weight": 0,
250
+ "properties": {
251
+ plan: ["free", "free", "free", "free", "basic", "basic", "basic", "premium", "premium", "enterprise"],
252
+ dateOfRenewal: date(100, false),
253
+ codewords: u.makeName,
254
+ }
255
+ }
256
+ ],
257
+ superProps: {
258
+ linked_device: deviceAttributes(),
259
+ server_metaData: generateMetaData()
260
+ },
261
+ /*
262
+ user properties work the same as event properties
263
+ each key should be an array or function reference
264
+ */
265
+ userProps: {
266
+ title: chance.profession.bind(chance),
267
+ luckyNumber: weighNumRange(42, 420),
268
+ experiment: designExperiment(),
269
+ spiritAnimal: ["unicorn", "dragon", "phoenix", "sasquatch", "yeti", "kraken", "jackalope", "thunderbird", "mothman", "nessie", "chupacabra", "jersey devil", "bigfoot", "weindgo", "bunyip", "mokele-mbembe", "tatzelwurm", "megalodon"],
270
+ timezone: chance.timezone.bind(chance), // ["America/New_York", "America/Los_Angeles", "America/Chicago", "America/Denver", "America/Phoenix", "America/Anchorage", "Pacific/Honolulu"]
271
+ ip: chance.ip.bind(chance),
272
+ // lastCart: makeProducts(5),
273
+
274
+ },
275
+
276
+ /** each generates it's own table */
277
+ scdProps: {
278
+ role: {
279
+ type: "user",
280
+ frequency: "week",
281
+ values: ["admin", "collaborator", "user", "view only", "no access"],
282
+ timing: 'fuzzy',
283
+ max: 10
284
+ },
285
+ NPS: {
286
+ type: "user",
287
+ frequency: "day",
288
+ values: weighNumRange(1, 10, 2, 150),
289
+ timing: 'fuzzy',
290
+ max: 10
291
+ },
292
+ MRR: {
293
+ type: "company_id",
294
+ frequency: "month",
295
+ values: weighNumRange(0, 10000, .15),
296
+ timing: 'fixed',
297
+ max: 10
298
+ },
299
+ AccountHealthScore: {
300
+ type: "company_id",
301
+ frequency: "week",
302
+ values: weighNumRange(1, 10, .15),
303
+ timing: 'fixed',
304
+ max: 40
305
+ },
306
+ plan: {
307
+ type: "company_id",
308
+ frequency: "month",
309
+ values: ["free", "basic", "premium", "enterprise"],
310
+ timing: 'fixed',
311
+ max: 10
312
+ },
313
+
314
+ },
315
+
316
+ mirrorProps: {
317
+ },
318
+
319
+ /*
320
+ for group analytics keys, we need an array of arrays [[],[],[]]
321
+ each pair represents a group_key and the number of profiles for that key
322
+ */
323
+ groupKeys: [
324
+ ['company_id', 500, []],
325
+ ['room_id', 10000, ["save video", "comment", "watch video"]],
326
+
327
+ ],
328
+ groupProps: {
329
+ company_id: {
330
+ name: () => { return chance.company(); },
331
+ email: () => { return `CSM: ${chance.pickone(["AK", "Jessica", "Michelle", "Dana", "Brian", "Dave"])}`; },
332
+ "# of employees": weighNumRange(3, 10000),
333
+ "industry": ["tech", "finance", "healthcare", "education", "government", "non-profit"],
334
+ "segment": ["enterprise", "SMB", "mid-market"],
335
+ "products": [["core"], ["core"], ["core", "add-ons"], ["core", "pro-serve"], ["core", "add-ons", "pro-serve"], ["core", "BAA", "enterprise"], ["free"], ["free"], ["free", "addons"]],
336
+ },
337
+ room_id: {
338
+ name: () => { return `#${chance.word({ length: integer(4, 24), capitalize: true })}`; },
339
+ email: ["public", "private"],
340
+ "room provider": ["partner", "core", "core", "core"],
341
+ "room capacity": weighNumRange(3, 1000000),
342
+ "isPublic": [true, false, false, false, false],
343
+ "country": chance.country.bind(chance),
344
+ "isVerified": [true, true, false, false, false],
345
+ }
346
+ },
347
+ groupEvents: [{
348
+ attribute_to_user: false,
349
+ event: "card charged",
350
+ weight: 1,
351
+ frequency: 30,
352
+ group_key: "company_id",
353
+ group_size: 500,
354
+ properties: {
355
+ amount: weighNumRange(5, 500, .25),
356
+ currency: ["USD", "USD", "USD", "CAD", "EUR", "EUR", "BTC", "BTC", "ETH", "JPY"],
357
+ plan: ["basic", "premium", "enterprise"],
358
+ "payment method": []
359
+ }
360
+ }],
361
+
362
+ lookupTables: [
363
+ {
364
+ key: "video_id",
365
+ entries: 500_000,
366
+ attributes: {
367
+ isFlagged: [true, false, false, false, false],
368
+ copyright: ["all rights reserved", "creative commons", "creative commons", "public domain", "fair use"],
369
+ uploader_id: chance.guid.bind(chance),
370
+ "uploader influence": ["low", "low", "low", "medium", "medium", "high"],
371
+ thumbs: weighNumRange(0, 35),
372
+ rating: ["G", "PG", "PG-13", "R", "NC-17", "PG-13", "R", "NC-17", "R", "PG", "PG"]
373
+ }
374
+
375
+ }
376
+ ],
377
+
378
+ hook: function (record, type, meta) {
379
+
380
+ if (type === "everything") {
381
+ const changeKeyNames = [
382
+ ["event", "action"],
383
+ ["time", "occurred_at"],
384
+ ["device_id", "client_id"],
385
+ ["user_id", "auth_uuid"]
386
+ ]
387
+
388
+ const keysToRemove = [
389
+ "insert_id",
390
+ "session_id"
391
+ ]
392
+
393
+ const profileKeysToRemove = [
394
+ "anonymousIds",
395
+ "sessionIds"
396
+ ]
397
+
398
+ // remove unwanted keys from profile
399
+ profileKeysToRemove.forEach(key => {
400
+ if (meta.profile.hasOwnProperty(key)) {
401
+ delete meta.profile[key];
402
+ }
403
+ });
404
+
405
+ record.forEach(event => {
406
+ // change key names
407
+ changeKeyNames.forEach(pair => {
408
+ const [oldKey, newKey] = pair;
409
+ if (event.hasOwnProperty(oldKey)) {
410
+ event[newKey] = event[oldKey];
411
+ delete event[oldKey];
412
+ }
413
+ });
414
+ });
415
+
416
+ // get all unique session_id
417
+ const uniqueSessionIds = new Set();
418
+ record.forEach(event => {
419
+ if (event.hasOwnProperty("session_id")) {
420
+ uniqueSessionIds.add(event["session_id"]);
421
+ }
422
+ });
423
+
424
+ // compute the length of all sessions
425
+ const sessions = [];
426
+ uniqueSessionIds.forEach(sessId => {
427
+ const sessEvents = record.filter(event => event["session_id"] === sessId);
428
+
429
+ // Parse all timestamps with dayjs and sort
430
+ const timestamps = sessEvents
431
+ .map(e => dayjs(e.occurred_at))
432
+ .sort((a, b) => a.valueOf() - b.valueOf());
433
+
434
+ // First timestamp is earliest, last is latest
435
+ const startTime = timestamps[0];
436
+ const endTime = timestamps[timestamps.length - 1];
437
+
438
+ // Calculate duration in milliseconds
439
+ const durationMs = endTime.diff(startTime, 'second');
440
+
441
+ sessions.push({
442
+ session_id: sessId,
443
+ event_count: sessEvents.length,
444
+ start_time: startTime.toISOString(),
445
+ duration_ms: durationMs,
446
+ uuid: meta.profile.distinct_id
447
+ });
448
+ });
449
+
450
+ // append sessions to the file
451
+ if (sessions.length > 0) {
452
+ const sessionLines = sessions.map(s => JSON.stringify(s)).join('\n') + '\n';
453
+ fs.appendFileSync(sessionsFile, sessionLines, 'utf8');
454
+ }
455
+
456
+ // remove unwanted keys
457
+ record.forEach(event => {
458
+ keysToRemove.forEach(key => {
459
+ if (event.hasOwnProperty(key)) {
460
+ delete event[key];
461
+ }
462
+ });
463
+ });
464
+
465
+ // every users has 2-10% of events duplicated
466
+ let numDupes = Math.floor(record.length * (integer(2, 10) / 100));
467
+ if (record.length < numDupes) numDupes = record.length - 1;
468
+ // pick random events to duplicate
469
+ const dupesIndexes = [];
470
+ while (dupesIndexes.length < numDupes) {
471
+ const idx = integer(0, record.length - 1);
472
+ if (!dupesIndexes.includes(idx)) {
473
+ dupesIndexes.push(idx);
474
+ }
475
+ }
476
+
477
+ // duplicate those events
478
+ dupesIndexes.forEach(idx => {
479
+ const eventToDup = record[idx];
480
+ const newEvent = { ...eventToDup };
481
+ record.push(newEvent);
482
+ });
483
+ }
484
+
485
+ return record;
486
+ }
487
+ };
488
+
489
+
490
+ function generateMetaData() {
491
+ // should return a deeply nested object
492
+ return function () {
493
+ return {
494
+ server: {
495
+ id: chance.guid(),
496
+ location: {
497
+ region: chance.pickone(["us-east-1", "us-west-2", "eu-central-1", "ap-southeast-1"]),
498
+ ip: chance.ip()
499
+ },
500
+ specs: {
501
+ cpu: `${chance.integer({ min: 2, max: 64 })} vCPUs`,
502
+ memory: `${chance.integer({ min: 4, max: 256 })} GB`,
503
+ storage: `${chance.integer({ min: 100, max: 2000 })} GB`
504
+ }
505
+ },
506
+ application: {
507
+ version: `v${chance.integer({ min: 1, max: 5 })}.${chance.integer({ min: 0, max: 10 })}.${chance.integer({ min: 0, max: 20 })}`,
508
+ environment: chance.pickone(["production","production","production","production","production","production","production","production", "staging", "development"])
509
+ },
510
+ request: {
511
+ id: chance.guid(),
512
+ latency_ms: chance.integer({ min: 20, max: 2000 }),
513
+ endpoint: chance.pickone(["/api/v1/login", "/api/v1/data", "/api/v1/submit", "/api/v1/logout"]),
514
+ method: chance.pickone(["GET", "POST", "PUT", "DELETE"])
515
+ },
516
+ response: {
517
+ status_code: chance.pickone([200, 200, 200, 201, 400, 401, 403, 404, 500, 502, 503]),
518
+ size_bytes: chance.integer({ min: 500, max: 5000 })
519
+ },
520
+ services: {
521
+ database: {
522
+ type: chance.pickone(["PostgreSQL", "MySQL", "MongoDB", "Redis"]),
523
+ version: `v${chance.integer({ min: 9, max: 14 })}.${chance.integer({ min: 0, max: 10 })}`,
524
+ host: chance.ip()
525
+ },
526
+ cache: {
527
+ type: chance.pickone(["Redis", "Memcached"]),
528
+ version: `v${chance.integer({ min: 4, max: 6 })}.${chance.integer({ min: 0, max: 10 })}`,
529
+ host: chance.ip()
530
+ },
531
+ message_queue: {
532
+ type: chance.pickone(["RabbitMQ", "Kafka", "AWS SQS"]),
533
+ version: `v${chance.integer({ min: 1, max: 3 })}.${chance.integer({ min: 0, max: 10 })}`,
534
+ host: chance.ip()
535
+ }
536
+ }
537
+ };
538
+ };
539
+ }
540
+
541
+
542
+ function makeHashTags() {
543
+ const possibleHashtags = [];
544
+ for (let i = 0; i < 20; i++) {
545
+ possibleHashtags.push('#' + u.makeName(2, ''));
546
+ }
547
+
548
+ const numHashtags = integer(integer(1, 5), integer(5, 10));
549
+ const hashtags = [];
550
+ for (let i = 0; i < numHashtags; i++) {
551
+ hashtags.push(chance.pickone(possibleHashtags));
552
+ }
553
+ return [hashtags];
554
+ };
555
+
556
+
557
+
558
+ function designExperiment() {
559
+ return function () {
560
+ const variants = ["A", "B", "C", "Control"];
561
+ const variant = chance.pickone(variants);
562
+ const experiments = ["no password", "social sign in", "new tutorial", "new search"];
563
+ const experiment = chance.pickone(experiments);
564
+ const multi_variates = ["A/B", "A/B/C", "A/B/C/D", "Control"];
565
+ const multi_variate = chance.pickone(multi_variates);
566
+ const impression_id = chance.guid();
567
+
568
+
569
+
570
+ const chosen = {
571
+ variant,
572
+ experiment,
573
+ multi_variate,
574
+ impression_id
575
+ };
576
+
577
+ return [chosen];
578
+ };
579
+ }
580
+
581
+ function deviceAttributes(isMobile = false) {
582
+ return function () {
583
+ let devices = ["desktop", "laptop", "desktop", "laptop", "desktop", "laptop", "other"];
584
+ if (isMobile) devices = [...devices, "mobile", "mobile", "mobile", "tablet"];
585
+ const device = chance.pickone(devices);
586
+ let oses = ["Windows", "macOS", "Windows", "macOS", "macOS", "Linux", "Windows", "macOS", "Windows", "macOS", "macOS", "TempleOS"];
587
+ if (isMobile) oses = [...oses, "iOS", "Android", "iOS", "Android"];
588
+ const os = chance.pickone(oses);
589
+ const browser = chance.pickone(["Chrome", "Firefox", "Safari", "Edge", "Opera", "IE", "Brave", "Vivaldi"]);
590
+ const version = chance.integer({ min: 1, max: 15 });
591
+ const resolution = chance.pickone(["1920x1080", "1280x720", "1024x768", "800x600", "640x480"]);
592
+ const language = chance.pickone(["en-US", "en-US", "en-US", "en-GB", "es", "es", "fr", "de", "it", "ja", "zh", "ru"]);
593
+
594
+ const chosen = {
595
+ platform: device,
596
+ os,
597
+ browser,
598
+ version,
599
+ resolution,
600
+ language,
601
+ linking_method: chance.pickone(["cookie", "localStorage", "fingerprint", "login"]),
602
+ heuristics: {
603
+ timezone_offset: chance.integer({ min: -720, max: 840 }),
604
+ plugins_installed: chance.integer({ min: 0, max: 15 }),
605
+ java_enabled: chance.bool(),
606
+ cookie_enabled: chance.bool(),
607
+ screen_color_depth: chance.pickone([24, 30, 32]),
608
+ screen_width: parseInt(resolution.split('x')[0]),
609
+ screen_height: parseInt(resolution.split('x')[1]),
610
+ viewport: {
611
+ width: parseInt(resolution.split('x')[0]),
612
+ height: parseInt(resolution.split('x')[1])
613
+ }
614
+ }
615
+ };
616
+
617
+ return chosen;
618
+
619
+ };
620
+ }
621
+
622
+ function makeProducts(maxItems = 50) {
623
+ return function () {
624
+ const categories = ["electronics", "books", "clothing", "home", "garden", "toys", "sports", "automotive", "beauty", "health", "grocery", "jewelry", "shoes", "tools", "office supplies"];
625
+ const descriptors = ["brand new", "open box", "refurbished", "used", "like new", "vintage", "antique", "collectible"];
626
+ const suffix = ["item", "product", "good", "merchandise", "thing", "object", "widget", "gadget", "device", "apparatus", "contraption", "instrument", "tool", "implement", "utensil", "appliance", "machine", "equipment", "gear", "kit", "set", "package"];
627
+ const assetPreview = ['.png', '.jpg', '.jpeg', '.heic', '.mp4', '.mov', '.avi'];
628
+ const data = [];
629
+ const numOfItems = integer(1, maxItems);
630
+
631
+ for (var i = 0; i < numOfItems; i++) {
632
+ const category = chance.pickone(categories);
633
+ const descriptor = chance.pickone(descriptors);
634
+ const suffixWord = chance.pickone(suffix);
635
+ const slug = `${descriptor.replace(/\s+/g, '-').toLowerCase()}-${suffixWord.replace(/\s+/g, '-').toLowerCase()}`;
636
+ const asset = chance.pickone(assetPreview);
637
+
638
+ // const product_id = chance.guid();
639
+ const price = integer(1, 100);
640
+ const quantity = integer(1, 5);
641
+
642
+ const item = {
643
+ // product_id: product_id,
644
+ // sku: integer(11111, 99999),
645
+ amount: price,
646
+ quantity: quantity,
647
+ total_value: price * quantity,
648
+ featured: chance.pickone([true, false, false]),
649
+ category: category,
650
+ descriptor: descriptor,
651
+ slug: slug,
652
+ assetPreview: `https://example.com/assets/${slug}${asset}`,
653
+ assetType: asset
654
+
655
+ };
656
+
657
+ data.push(item);
658
+ }
659
+
660
+ return () => [data];
661
+ };
662
+ };
663
+
664
+
665
+
666
+
667
+
668
+ export default config;