make-mp-data 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -0
- package/dungeons/array-of-object-lookup-schema.json +327 -0
- package/dungeons/array-of-object-lookup.js +28 -8
- package/dungeons/capstone/capstone-ic3.js +291 -0
- package/dungeons/capstone/capstone-ic4.js +598 -0
- package/dungeons/capstone/capstone-ic5.js +668 -0
- package/dungeons/capstone/generate-product-lookup.js +309 -0
- package/dungeons/ecommerce-schema.json +462 -0
- package/dungeons/{copilot.js → ecommerce.js} +77 -15
- package/dungeons/education-schema.json +2409 -0
- package/dungeons/education.js +206 -442
- package/dungeons/fintech-schema.json +14034 -0
- package/dungeons/fintech.js +110 -389
- package/dungeons/foobar-schema.json +403 -0
- package/dungeons/foobar.js +27 -4
- package/dungeons/food-delivery-schema.json +192 -0
- package/dungeons/food-delivery.js +602 -0
- package/dungeons/food-schema.json +1152 -0
- package/dungeons/food.js +150 -383
- package/dungeons/gaming-schema.json +1270 -0
- package/dungeons/gaming.js +143 -3
- package/dungeons/insurance-application-schema.json +204 -0
- package/dungeons/insurance-application.js +605 -0
- package/dungeons/media-schema.json +906 -0
- package/dungeons/media.js +221 -391
- package/dungeons/retention-cadence-schema.json +78 -0
- package/dungeons/retention-cadence.js +35 -1
- package/dungeons/rpg-schema.json +4526 -0
- package/dungeons/rpg.js +130 -388
- package/dungeons/sanity-schema.json +255 -0
- package/dungeons/sanity.js +21 -10
- package/dungeons/sass-schema.json +1291 -0
- package/dungeons/sass.js +210 -337
- package/dungeons/scd-schema.json +919 -0
- package/dungeons/scd.js +38 -10
- package/dungeons/simple-schema.json +608 -0
- package/dungeons/simple.js +48 -11
- package/dungeons/simplest-schema.json +1418 -0
- package/dungeons/simplest.js +392 -0
- package/dungeons/social-schema.json +1118 -0
- package/dungeons/social.js +124 -365
- package/dungeons/text-generation-schema.json +3096 -0
- package/dungeons/text-generation.js +71 -0
- package/index.js +6 -3
- package/lib/core/config-validator.js +18 -0
- package/lib/core/storage.js +5 -5
- package/lib/generators/events.js +4 -4
- package/lib/orchestrators/mixpanel-sender.js +12 -7
- package/lib/orchestrators/user-loop.js +14 -6
- package/lib/templates/soup-presets.js +188 -0
- package/lib/utils/utils.js +52 -6
- package/package.json +1 -1
- package/types.d.ts +20 -3
- package/dungeons/adspend.js +0 -117
- package/dungeons/anon.js +0 -128
- package/dungeons/benchmark-heavy.js +0 -240
- package/dungeons/benchmark-light.js +0 -126
- package/dungeons/big.js +0 -226
- package/dungeons/business.js +0 -391
- package/dungeons/complex.js +0 -428
- package/dungeons/experiments.js +0 -137
- package/dungeons/funnels.js +0 -309
- package/dungeons/mil.js +0 -323
- package/dungeons/mirror.js +0 -160
- package/dungeons/soup-test.js +0 -52
- package/dungeons/streaming.js +0 -372
- package/dungeons/strict-event-test.js +0 -30
- package/dungeons/student-teacher.js +0 -438
- package/dungeons/too-big-events.js +0 -203
- package/dungeons/user-agent.js +0 -209
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ═══════════════════════════════════════════════════════════════
|
|
3
|
+
* DATASET OVERVIEW
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════
|
|
5
|
+
*
|
|
6
|
+
* IC4 Capstone — enterprise SaaS dataset for Mixpanel certification.
|
|
7
|
+
* - Complex events with deeply nested metadata (server, request, response)
|
|
8
|
+
* - Text generation for support tickets and social posts
|
|
9
|
+
* - Session computation written to external JSONL file
|
|
10
|
+
* - Key name remapping (event→action, time→occured_at, etc.)
|
|
11
|
+
*
|
|
12
|
+
* ═══════════════════════════════════════════════════════════════
|
|
13
|
+
* ANALYTICS HOOKS (1 pattern — everything hook)
|
|
14
|
+
* ═══════════════════════════════════════════════════════════════
|
|
15
|
+
*
|
|
16
|
+
* 1. DATA TRANSFORMATION + DUPLICATION (everything hook)
|
|
17
|
+
* - Renames keys: event→action, time→occured_at, device_id→client_id, user_id→auth_uuid
|
|
18
|
+
* - Computes session aggregates and writes to external file
|
|
19
|
+
* - Removes insert_id and session_id from events
|
|
20
|
+
* - Duplicates ~3-7 random events per user
|
|
21
|
+
* Students must work with non-standard key names and handle duplicates.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import Chance from 'chance';
|
|
25
|
+
const chance = new Chance();
|
|
26
|
+
import { weighNumRange, date, integer } from "../brain/utils/utils.js";
|
|
27
|
+
import * as u from 'ak-tools';
|
|
28
|
+
import fs from 'fs';
|
|
29
|
+
import path from 'path';
|
|
30
|
+
import { fileURLToPath } from 'url';
|
|
31
|
+
|
|
32
|
+
import { createTextGenerator, generateBatch } from "../brain/generators/text.js";
|
|
33
|
+
import { PHRASE_BANK } from "../brain/templates/phrases.js";
|
|
34
|
+
|
|
35
|
+
// create this file if it doesn't exist
|
|
36
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
37
|
+
const __dirname = path.dirname(__filename);
|
|
38
|
+
const sessionsFile = path.join(__dirname, '../data/generated_sessions.jsonl');
|
|
39
|
+
|
|
40
|
+
// Create the sessions file if it doesn't exist (JSON Lines format)
|
|
41
|
+
if (!fs.existsSync(sessionsFile)) {
|
|
42
|
+
fs.writeFileSync(sessionsFile, '', 'utf8');
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
const enterpriseSupportGen = createTextGenerator({
|
|
47
|
+
style: "support",
|
|
48
|
+
tone: "neg",
|
|
49
|
+
intensity: "high",
|
|
50
|
+
formality: "technical",
|
|
51
|
+
keywords: {
|
|
52
|
+
features: ['Dashboard Analytics', 'Export API', 'SSO Login', 'Admin Console', 'User Management', 'SAML Authentication', 'OAuth Integration', 'Multi-Factor Auth', 'Role-Based Access Control', 'Audit Logs', 'Webhook Configuration', 'Data Pipeline', 'Custom Reports', 'Scheduled Exports', 'Real-Time Sync', 'Bulk Import', 'API Rate Limits', 'Usage Metrics', 'Team Permissions', 'Single Sign-On', 'LDAP Integration', 'Data Retention Policies', 'Custom Domains', 'White-Label Branding', 'Advanced Filtering', 'Query Builder', 'Notification Rules', 'Email Templates', 'Workflow Automation', 'Integration Hub', 'Data Validation', 'Field Mapping', 'Schema Management', 'Version Control', 'Rollback Capability', 'Disaster Recovery', 'High Availability', 'Load Balancing', 'Auto-Scaling', 'Performance Monitoring'],
|
|
53
|
+
products: ['DataViz Pro', 'Enterprise Suite', 'v3.2.1', 'v2.8.4', 'Analytics Platform', 'Cloud Dashboard', 'v4.1.0', 'v3.9.2', 'Enterprise Edition', 'Professional Tier', 'Business Intelligence Module', 'Data Warehouse Connector', 'Mobile SDK', 'REST API v2', 'GraphQL Endpoint', 'Legacy Platform', 'Next-Gen Analytics', 'Premium Package', 'Ultimate Plan', 'Advanced Analytics Suite', 'Reporting Engine', 'Data Integration Platform', 'ETL Pipeline', 'Real-Time Analytics', 'Batch Processing Module', 'Stream Processing', 'Visualization Library', 'Dashboard Builder', 'Report Designer'],
|
|
54
|
+
technical: ['CORS error', 'timeout', 'memory leak', 'authentication', 'database', 'connection pooling', 'rate limiting', 'SSL handshake', 'DNS resolution', 'load balancer', 'cache invalidation', 'session timeout', 'deadlock detection', 'query optimization', 'index fragmentation', 'replication lag', 'connection refused', 'network latency', 'packet loss', 'firewall rules', 'proxy configuration', 'certificate expiration', 'API throttling', 'websocket disconnection', 'redis cluster', 'kafka consumer lag', 'database migration', 'schema validation', 'token refresh', 'OAuth flow', 'SAML assertion', 'JWT validation', 'API Gateway', 'reverse proxy', 'CDN distribution', 'geo-replication', 'data consistency', 'eventual consistency', 'distributed transactions', 'circuit breaker', 'retry logic', 'exponential backoff', 'service mesh', 'container orchestration', 'pod scheduling', 'horizontal scaling', 'vertical scaling', 'auto-discovery', 'health checks', 'graceful shutdown'],
|
|
55
|
+
errors: ['ERR_CONNECTION_REFUSED', '500 Internal Server', 'TIMEOUT_ERROR', 'AUTH_FAILED', '502 Bad Gateway', '503 Service Unavailable', '504 Gateway Timeout', '401 Unauthorized', '403 Forbidden', '429 Too Many Requests', 'ECONNRESET', 'ETIMEDOUT', 'ERR_SSL_PROTOCOL', 'DATABASE_CONNECTION_FAILED', 'INVALID_TOKEN', 'SESSION_EXPIRED', 'QUOTA_EXCEEDED', 'CORS_POLICY_VIOLATION', 'VALIDATION_ERROR', 'PERMISSION_DENIED', 'RESOURCE_NOT_FOUND', 'DUPLICATE_ENTRY', 'CONSTRAINT_VIOLATION', 'OUT_OF_MEMORY', 'DISK_FULL', 'MAX_CONNECTIONS_REACHED', 'DEADLOCK_DETECTED', 'TRANSACTION_ROLLBACK', 'REPLICATION_ERROR', 'CLUSTER_SPLIT_BRAIN', 'FAILOVER_TRIGGERED', 'BACKUP_FAILED', 'RESTORE_ERROR', 'MIGRATION_FAILED', 'SCHEMA_MISMATCH', 'VERSION_CONFLICT'],
|
|
56
|
+
competitors: ['Tableau', 'PowerBI', 'Looker', 'Qlik', 'Domo', 'Sisense', 'ThoughtSpot', 'Mode Analytics', 'Metabase', 'Redash', 'Chartio', 'Periscope Data', 'Google Data Studio', 'Amazon QuickSight', 'IBM Cognos', 'SAP Analytics', 'Oracle BI', 'MicroStrategy', 'Yellowfin', 'Birst', 'Alteryx', 'Dataiku', 'Databricks', 'Snowflake', 'BigQuery', 'Redshift', 'Azure Synapse', 'Splunk', 'New Relic', 'Datadog']
|
|
57
|
+
},
|
|
58
|
+
mixedSentiment: true,
|
|
59
|
+
authenticityLevel: 0.7,
|
|
60
|
+
typos: true,
|
|
61
|
+
typoRate: 0.02,
|
|
62
|
+
specificityLevel: 0.8,
|
|
63
|
+
min: 80,
|
|
64
|
+
max: 254,
|
|
65
|
+
includeMetadata: false,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
/** @type {import('../types.js').Dungeon} */
|
|
70
|
+
const config = {
|
|
71
|
+
token: "",
|
|
72
|
+
seed: "its the capstone baby!",
|
|
73
|
+
name: "ic4-capstone",
|
|
74
|
+
numDays: 180, //how many days worth of data
|
|
75
|
+
numEvents: 5_000_000, //how many events
|
|
76
|
+
numUsers: 25_000, //how many users
|
|
77
|
+
format: 'json', //csv or json
|
|
78
|
+
region: "US",
|
|
79
|
+
gzip: true,
|
|
80
|
+
hasAnonIds: true, //if true, anonymousIds are created for each user
|
|
81
|
+
hasSessionIds: true, //if true, hasSessionIds are created for each user
|
|
82
|
+
|
|
83
|
+
hasLocation: true,
|
|
84
|
+
hasAndroidDevices: false,
|
|
85
|
+
hasIOSDevices: false,
|
|
86
|
+
hasDesktopDevices: false,
|
|
87
|
+
hasBrowser: false,
|
|
88
|
+
hasCampaigns: false,
|
|
89
|
+
isAnonymous: false,
|
|
90
|
+
hasAdSpend: true,
|
|
91
|
+
|
|
92
|
+
hasAvatar: true,
|
|
93
|
+
makeChart: false,
|
|
94
|
+
|
|
95
|
+
batchSize: 250_000,
|
|
96
|
+
concurrency: 1,
|
|
97
|
+
writeToDisk: true,
|
|
98
|
+
|
|
99
|
+
funnels: [],
|
|
100
|
+
events: [
|
|
101
|
+
{
|
|
102
|
+
"event": "checkout",
|
|
103
|
+
"weight": 2,
|
|
104
|
+
"properties": {
|
|
105
|
+
amount: weighNumRange(5, 500, .25),
|
|
106
|
+
currency: ["USD", "USD", "USD", "CAD", "EUR", "EUR", "BTC", "BTC", "ETH", "JPY"],
|
|
107
|
+
cart: makeProducts(),
|
|
108
|
+
}
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"event": "add to cart",
|
|
112
|
+
"weight": 4,
|
|
113
|
+
"properties": {
|
|
114
|
+
amount: weighNumRange(5, 500, .25),
|
|
115
|
+
qty: integer(1, 5),
|
|
116
|
+
product_id: weighNumRange(1, 10000, 1.4)
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"event": "page view",
|
|
121
|
+
"weight": 10,
|
|
122
|
+
"properties": {
|
|
123
|
+
page: ["/", "/", "/", "/learn-more", "/pricing", "/contact", "/about", "/careers", "/sign-up", "/login", "/app", "/app", "/app", "/app"],
|
|
124
|
+
utm_source: ["$organic", "$organic", "$organic", "$organic", "google", "google", "google", "facebook", "facebook", "twitter", "linkedin"],
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"event": "watch video",
|
|
129
|
+
"weight": 8,
|
|
130
|
+
"properties": {
|
|
131
|
+
category: ["funny", "educational", "inspirational", "music", "news", "sports", "cooking", "DIY", "travel", "gaming"],
|
|
132
|
+
hashTags: makeHashTags,
|
|
133
|
+
watchTimeSec: weighNumRange(10, 600, .25,),
|
|
134
|
+
quality: ["2160p", "1440p", "1080p", "720p", "480p", "360p", "240p"],
|
|
135
|
+
format: ["mp4", "avi", "mov", "mpg"],
|
|
136
|
+
video_id: weighNumRange(1, 50000, 1.4),
|
|
137
|
+
|
|
138
|
+
}
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"event": "comment",
|
|
142
|
+
"weight": 2,
|
|
143
|
+
"properties": {
|
|
144
|
+
length: weighNumRange(1, 500, .25),
|
|
145
|
+
video_id: weighNumRange(1, 50000, 1.4),
|
|
146
|
+
has_replies: [true, false, false, false, false],
|
|
147
|
+
has_photo: [true, false, false, false, false],
|
|
148
|
+
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"event": "save video",
|
|
153
|
+
"weight": 4,
|
|
154
|
+
"properties": {
|
|
155
|
+
video_id: weighNumRange(1, 50000, 1.4),
|
|
156
|
+
ui_control: ["toolbar", "menu", "keyboard"]
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
}
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"event": "view item",
|
|
163
|
+
"weight": 8,
|
|
164
|
+
"properties": {
|
|
165
|
+
product_id: weighNumRange(1, 10000, 3),
|
|
166
|
+
colors: ["light", "dark", "custom", "dark"]
|
|
167
|
+
}
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"event": "save item",
|
|
171
|
+
"weight": 5,
|
|
172
|
+
"properties": {
|
|
173
|
+
product_id: weighNumRange(1, 10000, 12),
|
|
174
|
+
colors: ["light", "dark", "custom", "dark"]
|
|
175
|
+
}
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
"event": "support ticket",
|
|
179
|
+
"weight": 2,
|
|
180
|
+
"properties": {
|
|
181
|
+
severity: ["low", "medium", "high"],
|
|
182
|
+
ticket_id: chance.guid.bind(chance),
|
|
183
|
+
ticket_text: () => enterpriseSupportGen.generateOne()
|
|
184
|
+
}
|
|
185
|
+
},
|
|
186
|
+
{
|
|
187
|
+
"event": "sign up",
|
|
188
|
+
"isFirstEvent": true,
|
|
189
|
+
"weight": 0,
|
|
190
|
+
"properties": {
|
|
191
|
+
plan: ["free", "free", "free", "free", "basic", "basic", "basic", "premium", "premium", "enterprise"],
|
|
192
|
+
dateOfRenewal: date(100, false),
|
|
193
|
+
codewords: u.makeName,
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
],
|
|
197
|
+
superProps: {
|
|
198
|
+
linked_device: deviceAttributes(),
|
|
199
|
+
server_metaData: generateMetaData()
|
|
200
|
+
},
|
|
201
|
+
/*
|
|
202
|
+
user properties work the same as event properties
|
|
203
|
+
each key should be an array or function reference
|
|
204
|
+
*/
|
|
205
|
+
userProps: {
|
|
206
|
+
title: chance.profession.bind(chance),
|
|
207
|
+
luckyNumber: weighNumRange(42, 420),
|
|
208
|
+
experiment: designExperiment(),
|
|
209
|
+
spiritAnimal: ["unicorn", "dragon", "phoenix", "sasquatch", "yeti", "kraken", "jackalope", "thunderbird", "mothman", "nessie", "chupacabra", "jersey devil", "bigfoot", "weindgo", "bunyip", "mokele-mbembe", "tatzelwurm", "megalodon"],
|
|
210
|
+
timezone: chance.timezone.bind(chance), // ["America/New_York", "America/Los_Angeles", "America/Chicago", "America/Denver", "America/Phoenix", "America/Anchorage", "Pacific/Honolulu"]
|
|
211
|
+
ip: chance.ip.bind(chance),
|
|
212
|
+
// lastCart: makeProducts(5),
|
|
213
|
+
|
|
214
|
+
},
|
|
215
|
+
|
|
216
|
+
/** each generates it's own table */
|
|
217
|
+
scdProps: {
|
|
218
|
+
role: {
|
|
219
|
+
type: "user",
|
|
220
|
+
frequency: "week",
|
|
221
|
+
values: ["admin", "collaborator", "user", "view only", "no access"],
|
|
222
|
+
timing: 'fuzzy',
|
|
223
|
+
max: 10
|
|
224
|
+
},
|
|
225
|
+
NPS: {
|
|
226
|
+
type: "user",
|
|
227
|
+
frequency: "day",
|
|
228
|
+
values: weighNumRange(1, 10, 2, 150),
|
|
229
|
+
timing: 'fuzzy',
|
|
230
|
+
max: 10
|
|
231
|
+
},
|
|
232
|
+
// MRR: {
|
|
233
|
+
// type: "company_id",
|
|
234
|
+
// frequency: "month",
|
|
235
|
+
// values: weighNumRange(0, 10000, .15),
|
|
236
|
+
// timing: 'fixed',
|
|
237
|
+
// max: 10
|
|
238
|
+
// },
|
|
239
|
+
// AccountHealthScore: {
|
|
240
|
+
// type: "company_id",
|
|
241
|
+
// frequency: "week",
|
|
242
|
+
// values: weighNumRange(1, 10, .15),
|
|
243
|
+
// timing: 'fixed',
|
|
244
|
+
// max: 40
|
|
245
|
+
// },
|
|
246
|
+
// plan: {
|
|
247
|
+
// type: "company_id",
|
|
248
|
+
// frequency: "month",
|
|
249
|
+
// values: ["free", "basic", "premium", "enterprise"],
|
|
250
|
+
// timing: 'fixed',
|
|
251
|
+
// max: 10
|
|
252
|
+
// }
|
|
253
|
+
},
|
|
254
|
+
|
|
255
|
+
mirrorProps: {
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
/*
|
|
259
|
+
for group analytics keys, we need an array of arrays [[],[],[]]
|
|
260
|
+
each pair represents a group_key and the number of profiles for that key
|
|
261
|
+
*/
|
|
262
|
+
// groupKeys: [
|
|
263
|
+
// ['company_id', 500, []],
|
|
264
|
+
// ['room_id', 10000, ["save video", "comment", "watch video"]],
|
|
265
|
+
|
|
266
|
+
// ],
|
|
267
|
+
// groupProps: {
|
|
268
|
+
// company_id: {
|
|
269
|
+
// name: () => { return chance.company(); },
|
|
270
|
+
// email: () => { return `CSM: ${chance.pickone(["AK", "Jessica", "Michelle", "Dana", "Brian", "Dave"])}`; },
|
|
271
|
+
// "# of employees": weighNumRange(3, 10000),
|
|
272
|
+
// "industry": ["tech", "finance", "healthcare", "education", "government", "non-profit"],
|
|
273
|
+
// "segment": ["enterprise", "SMB", "mid-market"],
|
|
274
|
+
// "products": [["core"], ["core"], ["core", "add-ons"], ["core", "pro-serve"], ["core", "add-ons", "pro-serve"], ["core", "BAA", "enterprise"], ["free"], ["free"], ["free", "addons"]],
|
|
275
|
+
// },
|
|
276
|
+
// room_id: {
|
|
277
|
+
// name: () => { return `#${chance.word({ length: integer(4, 24), capitalize: true })}`; },
|
|
278
|
+
// email: ["public", "private"],
|
|
279
|
+
// "room provider": ["partner", "core", "core", "core"],
|
|
280
|
+
// "room capacity": weighNumRange(3, 1000000),
|
|
281
|
+
// "isPublic": [true, false, false, false, false],
|
|
282
|
+
// "country": chance.country.bind(chance),
|
|
283
|
+
// "isVerified": [true, true, false, false, false],
|
|
284
|
+
// }
|
|
285
|
+
// },
|
|
286
|
+
// groupEvents: [{
|
|
287
|
+
// attribute_to_user: false,
|
|
288
|
+
// event: "card charged",
|
|
289
|
+
// weight: 1,
|
|
290
|
+
// frequency: 30,
|
|
291
|
+
// group_key: "company_id",
|
|
292
|
+
// group_size: 500,
|
|
293
|
+
// properties: {
|
|
294
|
+
// amount: weighNumRange(5, 500, .25),
|
|
295
|
+
// currency: ["USD", "USD", "USD", "CAD", "EUR", "EUR", "BTC", "BTC", "ETH", "JPY"],
|
|
296
|
+
// plan: ["basic", "premium", "enterprise"],
|
|
297
|
+
// "payment method": []
|
|
298
|
+
// }
|
|
299
|
+
// }],
|
|
300
|
+
|
|
301
|
+
lookupTables: [
|
|
302
|
+
{
|
|
303
|
+
key: "video_id",
|
|
304
|
+
entries: 50000,
|
|
305
|
+
attributes: {
|
|
306
|
+
isFlagged: [true, false, false, false, false],
|
|
307
|
+
copyright: ["all rights reserved", "creative commons", "creative commons", "public domain", "fair use"],
|
|
308
|
+
uploader_id: chance.guid.bind(chance),
|
|
309
|
+
"uploader influence": ["low", "low", "low", "medium", "medium", "high"],
|
|
310
|
+
thumbs: weighNumRange(0, 35),
|
|
311
|
+
rating: ["G", "PG", "PG-13", "R", "NC-17", "PG-13", "R", "NC-17", "R", "PG", "PG"]
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
}
|
|
315
|
+
],
|
|
316
|
+
|
|
317
|
+
hook: function (record, type, meta) {
|
|
318
|
+
|
|
319
|
+
if (type === "everything") {
|
|
320
|
+
const changeKeyNames = [
|
|
321
|
+
["event", "action"],
|
|
322
|
+
["time", "occured_at"],
|
|
323
|
+
["device_id", "client_id"],
|
|
324
|
+
["user_id", "auth_uuid"]
|
|
325
|
+
]
|
|
326
|
+
|
|
327
|
+
const keysToRemove = [
|
|
328
|
+
"insert_id",
|
|
329
|
+
"session_id"
|
|
330
|
+
]
|
|
331
|
+
|
|
332
|
+
record.forEach(event => {
|
|
333
|
+
// change key names
|
|
334
|
+
changeKeyNames.forEach(pair => {
|
|
335
|
+
const [oldKey, newKey] = pair;
|
|
336
|
+
if (event.hasOwnProperty(oldKey)) {
|
|
337
|
+
event[newKey] = event[oldKey];
|
|
338
|
+
delete event[oldKey];
|
|
339
|
+
}
|
|
340
|
+
});
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
// get all unique session_id
|
|
344
|
+
const uniqueSessionIds = new Set();
|
|
345
|
+
record.forEach(event => {
|
|
346
|
+
if (event.hasOwnProperty("session_id")) {
|
|
347
|
+
uniqueSessionIds.add(event["session_id"]);
|
|
348
|
+
}
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
// compute the length of all sessions
|
|
352
|
+
const sessions = [];
|
|
353
|
+
uniqueSessionIds.forEach(sessId => {
|
|
354
|
+
const sessEvents = record.filter(event => event["session_id"] === sessId);
|
|
355
|
+
sessions.push({
|
|
356
|
+
session_id: sessId,
|
|
357
|
+
event_count: sessEvents.length,
|
|
358
|
+
start_time: sessEvents.reduce((min, p) => p.occured_at < min ? p.occured_at : min, sessEvents[0].occured_at),
|
|
359
|
+
end_time: sessEvents.reduce((max, p) => p.occured_at > max ? p.occured_at : max, sessEvents[0].occured_at),
|
|
360
|
+
uuid: meta.profile.distinct_id
|
|
361
|
+
});
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
// append sessions to the file
|
|
365
|
+
if (sessions.length > 0) {
|
|
366
|
+
const sessionLines = sessions.map(s => JSON.stringify(s)).join('\n') + '\n';
|
|
367
|
+
fs.appendFileSync(sessionsFile, sessionLines, 'utf8');
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// remove unwanted keys
|
|
371
|
+
record.forEach(event => {
|
|
372
|
+
keysToRemove.forEach(key => {
|
|
373
|
+
if (event.hasOwnProperty(key)) {
|
|
374
|
+
delete event[key];
|
|
375
|
+
}
|
|
376
|
+
});
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
// every users has ~5 duplicate events
|
|
380
|
+
let numDupes = integer(3, 7);
|
|
381
|
+
if (record.length < numDupes) numDupes = record.length - 1;
|
|
382
|
+
// pick random events to duplicate
|
|
383
|
+
const dupesIndexes = [];
|
|
384
|
+
while (dupesIndexes.length < numDupes) {
|
|
385
|
+
const idx = integer(0, record.length - 1);
|
|
386
|
+
if (!dupesIndexes.includes(idx)) {
|
|
387
|
+
dupesIndexes.push(idx);
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// duplicate those events
|
|
392
|
+
dupesIndexes.forEach(idx => {
|
|
393
|
+
const eventToDup = record[idx];
|
|
394
|
+
const newEvent = { ...eventToDup };
|
|
395
|
+
record.push(newEvent);
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
return record;
|
|
400
|
+
}
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
function generateMetaData() {
|
|
405
|
+
// should return a deeply nested object
|
|
406
|
+
return function () {
|
|
407
|
+
return {
|
|
408
|
+
server: {
|
|
409
|
+
id: chance.guid(),
|
|
410
|
+
location: {
|
|
411
|
+
region: chance.pickone(["us-east-1", "us-west-2", "eu-central-1", "ap-southeast-1"]),
|
|
412
|
+
ip: chance.ip()
|
|
413
|
+
},
|
|
414
|
+
specs: {
|
|
415
|
+
cpu: `${chance.integer({ min: 2, max: 64 })} vCPUs`,
|
|
416
|
+
memory: `${chance.integer({ min: 4, max: 256 })} GB`,
|
|
417
|
+
storage: `${chance.integer({ min: 100, max: 2000 })} GB`
|
|
418
|
+
}
|
|
419
|
+
},
|
|
420
|
+
application: {
|
|
421
|
+
version: `v${chance.integer({ min: 1, max: 5 })}.${chance.integer({ min: 0, max: 10 })}.${chance.integer({ min: 0, max: 20 })}`,
|
|
422
|
+
environment: chance.pickone(["production","production","production","production","production","production","production","production", "staging", "development"])
|
|
423
|
+
},
|
|
424
|
+
request: {
|
|
425
|
+
id: chance.guid(),
|
|
426
|
+
latency_ms: chance.integer({ min: 20, max: 2000 }),
|
|
427
|
+
endpoint: chance.pickone(["/api/v1/login", "/api/v1/data", "/api/v1/submit", "/api/v1/logout"]),
|
|
428
|
+
method: chance.pickone(["GET", "POST", "PUT", "DELETE"])
|
|
429
|
+
},
|
|
430
|
+
response: {
|
|
431
|
+
status_code: chance.pickone([200, 200, 200, 201, 400, 401, 403, 404, 500, 502, 503]),
|
|
432
|
+
size_bytes: chance.integer({ min: 500, max: 5000 })
|
|
433
|
+
},
|
|
434
|
+
services: {
|
|
435
|
+
database: {
|
|
436
|
+
type: chance.pickone(["PostgreSQL", "MySQL", "MongoDB", "Redis"]),
|
|
437
|
+
version: `v${chance.integer({ min: 9, max: 14 })}.${chance.integer({ min: 0, max: 10 })}`,
|
|
438
|
+
host: chance.ip()
|
|
439
|
+
},
|
|
440
|
+
cache: {
|
|
441
|
+
type: chance.pickone(["Redis", "Memcached"]),
|
|
442
|
+
version: `v${chance.integer({ min: 4, max: 6 })}.${chance.integer({ min: 0, max: 10 })}`,
|
|
443
|
+
host: chance.ip()
|
|
444
|
+
},
|
|
445
|
+
message_queue: {
|
|
446
|
+
type: chance.pickone(["RabbitMQ", "Kafka", "AWS SQS"]),
|
|
447
|
+
version: `v${chance.integer({ min: 1, max: 3 })}.${chance.integer({ min: 0, max: 10 })}`,
|
|
448
|
+
host: chance.ip()
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
};
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
function makeHashTags() {
|
|
457
|
+
const possibleHashtags = [];
|
|
458
|
+
for (let i = 0; i < 20; i++) {
|
|
459
|
+
possibleHashtags.push('#' + u.makeName(2, ''));
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
const numHashtags = integer(integer(1, 5), integer(5, 10));
|
|
463
|
+
const hashtags = [];
|
|
464
|
+
for (let i = 0; i < numHashtags; i++) {
|
|
465
|
+
hashtags.push(chance.pickone(possibleHashtags));
|
|
466
|
+
}
|
|
467
|
+
return [hashtags];
|
|
468
|
+
};
|
|
469
|
+
|
|
470
|
+
// Generate realistic product names from catalog
|
|
471
|
+
function getProductName() {
|
|
472
|
+
const allProducts = Object.values(PHRASE_BANK.store_products).flat();
|
|
473
|
+
const baseProduct = chance.pickone(allProducts);
|
|
474
|
+
|
|
475
|
+
// Add variation 30% of the time
|
|
476
|
+
if (chance.bool({ likelihood: 30 })) {
|
|
477
|
+
const variations = [
|
|
478
|
+
` - ${chance.pickone(['Black', 'White', 'Blue', 'Red', 'Gray', 'Silver', 'Gold'])}`,
|
|
479
|
+
` - ${chance.pickone(['Small', 'Medium', 'Large', 'X-Large'])}`,
|
|
480
|
+
` - ${chance.pickone(['Pro', 'Plus', 'Lite', 'Max', 'Mini'])}`,
|
|
481
|
+
` (${chance.pickone(['2024 Model', '2025 Edition', 'Gen 2'])})`
|
|
482
|
+
];
|
|
483
|
+
return baseProduct + chance.pickone(variations);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
return baseProduct;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Get category for a product
|
|
490
|
+
function getProductCategory() {
|
|
491
|
+
const categories = [
|
|
492
|
+
"Electronics", "Home & Kitchen", "Clothing", "Beauty & Personal Care",
|
|
493
|
+
"Sports & Outdoors", "Books & Media", "Toys & Games", "Pet Supplies",
|
|
494
|
+
"Office Supplies", "Baby Products", "Automotive", "Garden & Outdoor"
|
|
495
|
+
];
|
|
496
|
+
return chance.pickone(categories);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
function designExperiment() {
|
|
503
|
+
return function () {
|
|
504
|
+
const variants = ["A", "B", "C", "Control"];
|
|
505
|
+
const variant = chance.pickone(variants);
|
|
506
|
+
const experiments = ["no password", "social sign in", "new tutorial", "new search"];
|
|
507
|
+
const experiment = chance.pickone(experiments);
|
|
508
|
+
const multi_variates = ["A/B", "A/B/C", "A/B/C/D", "Control"];
|
|
509
|
+
const multi_variate = chance.pickone(multi_variates);
|
|
510
|
+
const impression_id = chance.guid();
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
const chosen = {
|
|
515
|
+
variant,
|
|
516
|
+
experiment,
|
|
517
|
+
multi_variate,
|
|
518
|
+
impression_id
|
|
519
|
+
};
|
|
520
|
+
|
|
521
|
+
return [chosen];
|
|
522
|
+
};
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
function deviceAttributes(isMobile = false) {
|
|
526
|
+
return function () {
|
|
527
|
+
let devices = ["desktop", "laptop", "desktop", "laptop", "desktop", "laptop", "other"];
|
|
528
|
+
if (isMobile) devices = [...devices, "mobile", "mobile", "mobile", "tablet"];
|
|
529
|
+
const device = chance.pickone(devices);
|
|
530
|
+
let oses = ["Windows", "macOS", "Windows", "macOS", "macOS", "Linux", "Windows", "macOS", "Windows", "macOS", "macOS", "TempleOS"];
|
|
531
|
+
if (isMobile) oses = [...oses, "iOS", "Android", "iOS", "Android"];
|
|
532
|
+
const os = chance.pickone(oses);
|
|
533
|
+
const browser = chance.pickone(["Chrome", "Firefox", "Safari", "Edge", "Opera", "IE", "Brave", "Vivaldi"]);
|
|
534
|
+
const version = chance.integer({ min: 1, max: 15 });
|
|
535
|
+
const resolution = chance.pickone(["1920x1080", "1280x720", "1024x768", "800x600", "640x480"]);
|
|
536
|
+
const language = chance.pickone(["en-US", "en-US", "en-US", "en-GB", "es", "es", "fr", "de", "it", "ja", "zh", "ru"]);
|
|
537
|
+
|
|
538
|
+
const chosen = {
|
|
539
|
+
platform: device,
|
|
540
|
+
os,
|
|
541
|
+
browser,
|
|
542
|
+
version,
|
|
543
|
+
resolution,
|
|
544
|
+
language
|
|
545
|
+
};
|
|
546
|
+
|
|
547
|
+
return chosen;
|
|
548
|
+
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
function makeProducts(maxItems = 10) {
|
|
553
|
+
return function () {
|
|
554
|
+
const categories = ["electronics", "books", "clothing", "home", "garden", "toys", "sports", "automotive", "beauty", "health", "grocery", "jewelry", "shoes", "tools", "office supplies"];
|
|
555
|
+
const descriptors = ["brand new", "open box", "refurbished", "used", "like new", "vintage", "antique", "collectible"];
|
|
556
|
+
const suffix = ["item", "product", "good", "merchandise", "thing", "object", "widget", "gadget", "device", "apparatus", "contraption", "instrument", "tool", "implement", "utensil", "appliance", "machine", "equipment", "gear", "kit", "set", "package"];
|
|
557
|
+
const assetPreview = ['.png', '.jpg', '.jpeg', '.heic', '.mp4', '.mov', '.avi'];
|
|
558
|
+
const data = [];
|
|
559
|
+
const numOfItems = integer(1, maxItems);
|
|
560
|
+
|
|
561
|
+
for (var i = 0; i < numOfItems; i++) {
|
|
562
|
+
const category = chance.pickone(categories);
|
|
563
|
+
const descriptor = chance.pickone(descriptors);
|
|
564
|
+
const suffixWord = chance.pickone(suffix);
|
|
565
|
+
const slug = `${descriptor.replace(/\s+/g, '-').toLowerCase()}-${suffixWord.replace(/\s+/g, '-').toLowerCase()}`;
|
|
566
|
+
const asset = chance.pickone(assetPreview);
|
|
567
|
+
|
|
568
|
+
// const product_id = chance.guid();
|
|
569
|
+
const price = integer(1, 100);
|
|
570
|
+
const quantity = integer(1, 5);
|
|
571
|
+
|
|
572
|
+
const item = {
|
|
573
|
+
// product_id: product_id,
|
|
574
|
+
// sku: integer(11111, 99999),
|
|
575
|
+
amount: price,
|
|
576
|
+
quantity: quantity,
|
|
577
|
+
total_value: price * quantity,
|
|
578
|
+
featured: chance.pickone([true, false, false]),
|
|
579
|
+
category: category,
|
|
580
|
+
descriptor: descriptor,
|
|
581
|
+
slug: slug,
|
|
582
|
+
assetPreview: `https://example.com/assets/${slug}${asset}`,
|
|
583
|
+
assetType: asset
|
|
584
|
+
|
|
585
|
+
};
|
|
586
|
+
|
|
587
|
+
data.push(item);
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
return () => [data];
|
|
591
|
+
};
|
|
592
|
+
};
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
export default config;
|