make-mp-data 2.0.22 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dungeons/ai-chat-analytics-ed.js +274 -0
- package/dungeons/business.js +0 -1
- package/dungeons/complex.js +0 -1
- package/dungeons/experiments.js +0 -1
- package/dungeons/gaming.js +47 -14
- package/dungeons/media.js +5 -6
- package/dungeons/mil.js +296 -0
- package/dungeons/money2020-ed-also.js +277 -0
- package/dungeons/money2020-ed.js +579 -0
- package/dungeons/sanity.js +0 -1
- package/dungeons/scd.js +0 -1
- package/dungeons/simple.js +57 -18
- package/dungeons/student-teacher.js +0 -1
- package/dungeons/text-generation.js +706 -0
- package/dungeons/userAgent.js +1 -2
- package/entry.js +4 -0
- package/index.js +63 -38
- package/lib/cli/cli.js +7 -8
- package/lib/core/config-validator.js +11 -13
- package/lib/core/context.js +13 -1
- package/lib/core/storage.js +45 -13
- package/lib/generators/adspend.js +1 -1
- package/lib/generators/events.js +18 -17
- package/lib/generators/funnels.js +293 -240
- package/lib/generators/text-bak-old.js +1121 -0
- package/lib/generators/text.js +1173 -0
- package/lib/orchestrators/mixpanel-sender.js +1 -1
- package/lib/templates/abbreviated.d.ts +13 -3
- package/lib/templates/defaults.js +311 -169
- package/lib/templates/hooks-instructions.txt +434 -0
- package/lib/templates/phrases-bak.js +925 -0
- package/lib/templates/phrases.js +2066 -0
- package/lib/templates/{instructions.txt → schema-instructions.txt} +78 -1
- package/lib/templates/scratch-dungeon-template.js +1 -1
- package/lib/templates/textQuickTest.js +172 -0
- package/lib/utils/ai.js +51 -2
- package/lib/utils/utils.js +145 -7
- package/package.json +8 -5
- package/types.d.ts +322 -7
- package/lib/utils/chart.js +0 -206
|
@@ -58,7 +58,84 @@ Core Requirements:
|
|
|
58
58
|
- Use lookup tables if events reference external entities with their own attributes (e.g., product_id, video_id).
|
|
59
59
|
- Use funnel conditions when different user segments or cohorts should have different behavioral patterns (e.g., premium vs free users, students vs teachers, rider vs driver, doctor vs patient).
|
|
60
60
|
|
|
61
|
-
4. Available Functions: You have access to these built-in functions: date, weighNumRange, range, and
|
|
61
|
+
4. Available Functions: You have access to these built-in functions: date, weighNumRange, range, chance library methods, and createGenerator for structured text generation.
|
|
62
|
+
|
|
63
|
+
5. Structured Text Generation: When your use case involves user-generated content (reviews, comments, support tickets, chat messages, social media posts, etc.), use createGenerator() to produce realistic, contextual text.
|
|
64
|
+
|
|
65
|
+
Text Generator Usage:
|
|
66
|
+
|
|
67
|
+
- createGenerator() creates a text generator object that produces varied, authentic-sounding text
|
|
68
|
+
- Embed the createGenerator() call directly in the properties object (NOT as a quoted string)
|
|
69
|
+
- The generator automatically produces unique text for each event
|
|
70
|
+
|
|
71
|
+
Example for a customer support platform:
|
|
72
|
+
|
|
73
|
+
{
|
|
74
|
+
event: "support_ticket_submitted",
|
|
75
|
+
properties: {
|
|
76
|
+
ticket_text: createGenerator({
|
|
77
|
+
style: "support",
|
|
78
|
+
tone: "neg",
|
|
79
|
+
formality: "business",
|
|
80
|
+
keywords: {
|
|
81
|
+
features: ["Dashboard", "Export API", "User Management"],
|
|
82
|
+
technical: ["timeout", "authentication", "database error"],
|
|
83
|
+
errors: ["500 Internal Server", "TIMEOUT_ERROR"]
|
|
84
|
+
},
|
|
85
|
+
min: 80,
|
|
86
|
+
max: 300
|
|
87
|
+
}),
|
|
88
|
+
priority: ["low", "medium", "high", "critical"]
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
Example for social media platform:
|
|
93
|
+
|
|
94
|
+
{
|
|
95
|
+
event: "comment_posted",
|
|
96
|
+
properties: {
|
|
97
|
+
comment_text: createGenerator({
|
|
98
|
+
style: "chat",
|
|
99
|
+
tone: "pos",
|
|
100
|
+
formality: "casual",
|
|
101
|
+
keywords: {
|
|
102
|
+
products: ["the app", "this feature"],
|
|
103
|
+
emotions: ["love it", "amazing", "great job"]
|
|
104
|
+
},
|
|
105
|
+
typos: true,
|
|
106
|
+
typoRate: 0.03,
|
|
107
|
+
min: 10,
|
|
108
|
+
max: 150
|
|
109
|
+
}),
|
|
110
|
+
post_type: ["text", "image", "video"]
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
Text Generator Options:
|
|
115
|
+
|
|
116
|
+
- style: "support", "review", "forum", "search", "feedback", "chat", "comments", "tweet", "email"
|
|
117
|
+
- tone: "pos" (positive), "neg" (negative), "neu" (neutral)
|
|
118
|
+
- formality: "casual", "business", "technical"
|
|
119
|
+
- keywords: Object with arrays of domain-specific terms to include (features, products, technical, errors, etc.)
|
|
120
|
+
- min/max: Character length range
|
|
121
|
+
- typos: true/false (adds realistic typos)
|
|
122
|
+
- typoRate: 0.01 to 0.1 (percentage of typos)
|
|
123
|
+
- mixedSentiment: true/false (varies sentiment within text)
|
|
124
|
+
- authenticityLevel: 0.0 to 1.0 (how "real" the text sounds)
|
|
125
|
+
- specificityLevel: 0.0 to 1.0 (level of detail)
|
|
126
|
+
|
|
127
|
+
When to Use createGenerator:
|
|
128
|
+
|
|
129
|
+
- Customer support tickets and responses
|
|
130
|
+
- Product reviews and ratings
|
|
131
|
+
- Social media posts (tweets, LinkedIn, Reddit)
|
|
132
|
+
- Forum discussions and comments
|
|
133
|
+
- Chat messages and live support
|
|
134
|
+
- Search queries
|
|
135
|
+
- User feedback and feature requests
|
|
136
|
+
- Email communications
|
|
137
|
+
- Bug reports
|
|
138
|
+
- Any scenario involving user-written or generated text content
|
|
62
139
|
|
|
63
140
|
❌ Critical Rules to Follow:
|
|
64
141
|
|
|
@@ -8,7 +8,7 @@ dayjs.extend(utc);
|
|
|
8
8
|
import "dotenv/config";
|
|
9
9
|
import { weighNumRange, range, date, initChance, exhaust, choose, integer, decimal, odds } from "../utils/utils.js";
|
|
10
10
|
const { NODE_ENV = "unknown" } = process.env;
|
|
11
|
-
|
|
11
|
+
import * as u from '../utils/utils.js'
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
/**
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { createGenerator, generateBatch } from "../generators/text.js";
|
|
2
|
+
|
|
3
|
+
function main() {
|
|
4
|
+
// Enterprise support ticket generator with keywords and high authenticity
|
|
5
|
+
const enterpriseSupportGen = createGenerator({
|
|
6
|
+
style: "support",
|
|
7
|
+
tone: "neg",
|
|
8
|
+
intensity: "high",
|
|
9
|
+
formality: "technical",
|
|
10
|
+
keywords: {
|
|
11
|
+
features: ['Dashboard Analytics', 'Export API', 'SSO Login', 'Admin Console', 'User Management'],
|
|
12
|
+
products: ['DataViz Pro', 'Enterprise Suite', 'v3.2.1', 'v2.8.4'],
|
|
13
|
+
technical: ['CORS error', 'timeout', 'memory leak', 'authentication', 'database'],
|
|
14
|
+
errors: ['ERR_CONNECTION_REFUSED', '500 Internal Server', 'TIMEOUT_ERROR', 'AUTH_FAILED'],
|
|
15
|
+
competitors: ['Tableau', 'PowerBI', 'Looker', 'Qlik']
|
|
16
|
+
},
|
|
17
|
+
mixedSentiment: true,
|
|
18
|
+
authenticityLevel: 0.7,
|
|
19
|
+
typos: true,
|
|
20
|
+
typoRate: 0.02,
|
|
21
|
+
specificityLevel: 0.8,
|
|
22
|
+
min: 80,
|
|
23
|
+
max: 300,
|
|
24
|
+
includeMetadata: false,
|
|
25
|
+
// System is now always optimized for speed + uniqueness
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const twentyEnterpriseSupport = enterpriseSupportGen.generateBatch({ n: 20 });
|
|
29
|
+
|
|
30
|
+
// Casual review generator with typos and mixed sentiment
|
|
31
|
+
const casualReviewGen = createGenerator({
|
|
32
|
+
style: "review",
|
|
33
|
+
tone: "pos",
|
|
34
|
+
intensity: "medium",
|
|
35
|
+
formality: "casual",
|
|
36
|
+
keywords: {
|
|
37
|
+
features: ['user interface', 'mobile app', 'notifications', 'search function'],
|
|
38
|
+
products: ['the app', 'this tool', 'the platform'],
|
|
39
|
+
metrics: ['response time', 'uptime', 'user experience']
|
|
40
|
+
},
|
|
41
|
+
mixedSentiment: true,
|
|
42
|
+
authenticityLevel: 0.4,
|
|
43
|
+
typos: true,
|
|
44
|
+
typoRate: 0.03,
|
|
45
|
+
sentimentDrift: 0.3,
|
|
46
|
+
min: 30,
|
|
47
|
+
max: 200,
|
|
48
|
+
includeMetadata: false
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const twentyCasualReviews = casualReviewGen.generateBatch({ n: 20 });
|
|
52
|
+
|
|
53
|
+
// Technical forum posts with advanced features
|
|
54
|
+
const technicalForumGen = createGenerator({
|
|
55
|
+
style: "forum",
|
|
56
|
+
tone: "neu",
|
|
57
|
+
formality: "technical",
|
|
58
|
+
keywords: {
|
|
59
|
+
technical: ['REST API', 'GraphQL', 'webhooks', 'microservices', 'cloud infrastructure'],
|
|
60
|
+
versions: ['v1.2.3', 'latest', 'beta', 'stable release'],
|
|
61
|
+
errors: ['404 Not Found', 'Rate Limiting', 'SSL Certificate']
|
|
62
|
+
},
|
|
63
|
+
userPersona: true,
|
|
64
|
+
timestamps: true,
|
|
65
|
+
authenticityLevel: 0.6,
|
|
66
|
+
specificityLevel: 0.9,
|
|
67
|
+
min: 20,
|
|
68
|
+
max: 250,
|
|
69
|
+
includeMetadata: false
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const twentyTechForms = technicalForumGen.generateBatch({ n: 20 });
|
|
73
|
+
|
|
74
|
+
// Search queries with realistic typos
|
|
75
|
+
const searchQueryGen = createGenerator({
|
|
76
|
+
style: "search",
|
|
77
|
+
tone: "neu",
|
|
78
|
+
formality: "casual",
|
|
79
|
+
keywords: {
|
|
80
|
+
features: ['export data', 'user settings', 'help docs', 'pricing'],
|
|
81
|
+
technical: ['API documentation', 'integration guide', 'troubleshooting']
|
|
82
|
+
},
|
|
83
|
+
typos: true,
|
|
84
|
+
typoRate: 0.05,
|
|
85
|
+
min: 2,
|
|
86
|
+
max: 50,
|
|
87
|
+
includeMetadata: false
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const twentySearch = searchQueryGen.generateBatch({ n: 20 });
|
|
91
|
+
|
|
92
|
+
// Business feedback with professional tone
|
|
93
|
+
const businessFeedbackGen = createGenerator({
|
|
94
|
+
style: "feedback",
|
|
95
|
+
tone: "neu",
|
|
96
|
+
formality: "business",
|
|
97
|
+
keywords: {
|
|
98
|
+
metrics: ['ROI', 'efficiency', 'cost savings', 'productivity'],
|
|
99
|
+
features: ['reporting', 'analytics', 'integration capabilities']
|
|
100
|
+
},
|
|
101
|
+
authenticityLevel: 0.3,
|
|
102
|
+
specificityLevel: 0.7,
|
|
103
|
+
min: 40,
|
|
104
|
+
max: 180,
|
|
105
|
+
includeMetadata: false
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const twentyFeedback = businessFeedbackGen.generateBatch({ n: 20 });
|
|
109
|
+
|
|
110
|
+
// Chat messages with high authenticity and typos
|
|
111
|
+
const chatMessageGen = createGenerator({
|
|
112
|
+
style: "chat",
|
|
113
|
+
tone: "neu",
|
|
114
|
+
formality: "casual",
|
|
115
|
+
keywords: {
|
|
116
|
+
products: ['the app', 'dashboard', 'mobile version'],
|
|
117
|
+
features: ['notifications', 'sync', 'offline mode']
|
|
118
|
+
},
|
|
119
|
+
mixedSentiment: true,
|
|
120
|
+
authenticityLevel: 0.8,
|
|
121
|
+
typos: true,
|
|
122
|
+
typoRate: 0.04,
|
|
123
|
+
sentimentDrift: 0.4,
|
|
124
|
+
min: 5,
|
|
125
|
+
max: 150,
|
|
126
|
+
includeMetadata: false
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
const twentyChatMsg = chatMessageGen.generateBatch({ n: 20 });
|
|
130
|
+
|
|
131
|
+
// Email communication generator
|
|
132
|
+
const emailGen = createGenerator({
|
|
133
|
+
style: "email",
|
|
134
|
+
tone: "neu",
|
|
135
|
+
formality: "business",
|
|
136
|
+
keywords: {
|
|
137
|
+
features: ['account management', 'billing', 'subscription'],
|
|
138
|
+
products: ['Enterprise Plan', 'Pro Account']
|
|
139
|
+
},
|
|
140
|
+
authenticityLevel: 0.5,
|
|
141
|
+
userPersona: true,
|
|
142
|
+
min: 60,
|
|
143
|
+
max: 300,
|
|
144
|
+
includeMetadata: false
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
const twentyEmails = emailGen.generateBatch({ n: 20 });
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
email: twentyEmails,
|
|
151
|
+
chat: twentyChatMsg,
|
|
152
|
+
feedback: twentyFeedback,
|
|
153
|
+
search: twentySearch,
|
|
154
|
+
tech: twentyTechForms,
|
|
155
|
+
casual: twentyCasualReviews,
|
|
156
|
+
enterprise: twentyEnterpriseSupport
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
const result = main();
|
|
163
|
+
|
|
164
|
+
for (const key in result) {
|
|
165
|
+
console.log(`${key?.toUpperCase()}`);
|
|
166
|
+
console.log(`----------`)
|
|
167
|
+
const toShow = result[key]?.join("\n\n");
|
|
168
|
+
console.log(toShow)
|
|
169
|
+
console.log(`----------\n\n`)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// debugger;
|
package/lib/utils/ai.js
CHANGED
|
@@ -21,12 +21,28 @@ CURRENT_PROMPT = `build me a dungeon stream with these events and structure
|
|
|
21
21
|
but use all the different mcdonalds products as a possible values`;
|
|
22
22
|
CURRENT_PROMPT = ``;
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
export async function ask(question) {
|
|
25
|
+
const prompt = question;
|
|
26
|
+
const ai = new AITransformer({
|
|
27
|
+
apiKey: API_KEY,
|
|
28
|
+
onlyJSON: false,
|
|
29
|
+
responseSchema: {
|
|
30
|
+
type: "string"
|
|
31
|
+
},
|
|
32
|
+
modelName: "gemini-2.5-flash-lite"
|
|
33
|
+
});
|
|
34
|
+
await ai.init();
|
|
35
|
+
const response = await ai.message(prompt);
|
|
36
|
+
// if (NODE_ENV === "dev") {
|
|
37
|
+
// debugger;
|
|
38
|
+
// }
|
|
39
|
+
return response?.toString()?.trim();
|
|
40
|
+
}
|
|
25
41
|
|
|
26
42
|
async function main(params) {
|
|
27
43
|
const { prompt } = params;
|
|
28
44
|
if (!prompt) throw new Error("Please provide a prompt");
|
|
29
|
-
let INSTRUCTIONS = await u.load('./lib/templates/instructions.txt', false);
|
|
45
|
+
let INSTRUCTIONS = await u.load('./lib/templates/schema-instructions.txt', false);
|
|
30
46
|
const TYPES = await u.load('./lib/templates/abbreviated.d.ts', false);
|
|
31
47
|
const VERBOSE_SCHEMA_FILE = await u.load('./lib/templates/verbose-schema.js', false);
|
|
32
48
|
const VERBOSE_SCHEMA = VERBOSE_SCHEMA_FILE.split(`//SPLIT HERE`).pop()?.trim() || ``;
|
|
@@ -53,6 +69,39 @@ async function main(params) {
|
|
|
53
69
|
|
|
54
70
|
}
|
|
55
71
|
|
|
72
|
+
/**
|
|
73
|
+
* Generate AI hooks based on current schema and user's desired trends
|
|
74
|
+
* @param {object} params - Parameters object
|
|
75
|
+
* @param {string} params.prompt - User's description of desired trends
|
|
76
|
+
* @param {object} params.currentSchema - The existing dungeon schema
|
|
77
|
+
* @returns {Promise<string>} Generated hook function code
|
|
78
|
+
*/
|
|
79
|
+
export async function generateAIHooks(params) {
|
|
80
|
+
const { prompt, currentSchema } = params;
|
|
81
|
+
if (!prompt) throw new Error("Please provide a prompt describing the trends you want");
|
|
82
|
+
if (!currentSchema) throw new Error("Please provide the current schema");
|
|
83
|
+
|
|
84
|
+
// Load hooks instructions template
|
|
85
|
+
let HOOKS_INSTRUCTIONS = await u.load('./lib/templates/hooks-instructions.txt', false);
|
|
86
|
+
|
|
87
|
+
// Format the current schema as a readable string
|
|
88
|
+
const schemaString = JSON.stringify(currentSchema, null, 2);
|
|
89
|
+
|
|
90
|
+
// Replace placeholder with actual schema
|
|
91
|
+
HOOKS_INSTRUCTIONS = HOOKS_INSTRUCTIONS.replace(/<CURRENT_SCHEMA>/g, schemaString);
|
|
92
|
+
|
|
93
|
+
const ai = new AITransformer({
|
|
94
|
+
apiKey: API_KEY,
|
|
95
|
+
onlyJSON: false,
|
|
96
|
+
systemInstructions: HOOKS_INSTRUCTIONS?.trim(),
|
|
97
|
+
modelName: "gemini-2.5-pro",
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
await ai.init();
|
|
101
|
+
const response = await ai.message(prompt);
|
|
102
|
+
|
|
103
|
+
return response?.toString()?.trim();
|
|
104
|
+
}
|
|
56
105
|
|
|
57
106
|
export default main;
|
|
58
107
|
|
package/lib/utils/utils.js
CHANGED
|
@@ -8,10 +8,11 @@ import utc from 'dayjs/plugin/utc.js';
|
|
|
8
8
|
import path from 'path';
|
|
9
9
|
import { mkdir, parseGCSUri } from 'ak-tools';
|
|
10
10
|
import { existsSync } from 'fs';
|
|
11
|
+
import zlib from 'zlib';
|
|
11
12
|
dayjs.extend(utc);
|
|
12
13
|
import 'dotenv/config';
|
|
13
14
|
import { domainSuffix, domainPrefix } from '../templates/defaults.js';
|
|
14
|
-
const {NODE_ENV = "unknown"} = process.env;
|
|
15
|
+
const { NODE_ENV = "unknown" } = process.env;
|
|
15
16
|
|
|
16
17
|
/** @typedef {import('../../types').Dungeon} Config */
|
|
17
18
|
/** @typedef {import('../../types').EventConfig} EventConfig */
|
|
@@ -207,6 +208,14 @@ function choose(value) {
|
|
|
207
208
|
|
|
208
209
|
}
|
|
209
210
|
|
|
211
|
+
// if the thing has a .next() method, call that
|
|
212
|
+
try {
|
|
213
|
+
if (value && typeof value.next === 'function') {
|
|
214
|
+
return value.next();
|
|
215
|
+
}
|
|
216
|
+
} catch (e) {
|
|
217
|
+
console.error(`Error occurred while calling next(): ${e}`);
|
|
218
|
+
}
|
|
210
219
|
|
|
211
220
|
try {
|
|
212
221
|
// Keep resolving the value if it's a function (with caching)
|
|
@@ -483,15 +492,22 @@ STREAMERS
|
|
|
483
492
|
----
|
|
484
493
|
*/
|
|
485
494
|
|
|
486
|
-
function streamJSON(filePath, data) {
|
|
495
|
+
function streamJSON(filePath, data, options = {}) {
|
|
487
496
|
return new Promise((resolve, reject) => {
|
|
488
497
|
let writeStream;
|
|
498
|
+
const { gzip = false } = options;
|
|
499
|
+
|
|
489
500
|
if (filePath?.startsWith('gs://')) {
|
|
490
501
|
const { uri, bucket, file } = parseGCSUri(filePath);
|
|
491
502
|
writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
|
|
492
503
|
}
|
|
493
504
|
else {
|
|
494
505
|
writeStream = fs.createWriteStream(filePath, { encoding: 'utf8' });
|
|
506
|
+
if (gzip) {
|
|
507
|
+
const gzipStream = zlib.createGzip();
|
|
508
|
+
gzipStream.pipe(writeStream);
|
|
509
|
+
writeStream = gzipStream;
|
|
510
|
+
}
|
|
495
511
|
}
|
|
496
512
|
data.forEach(item => {
|
|
497
513
|
writeStream.write(JSON.stringify(item) + '\n');
|
|
@@ -504,15 +520,22 @@ function streamJSON(filePath, data) {
|
|
|
504
520
|
});
|
|
505
521
|
}
|
|
506
522
|
|
|
507
|
-
function streamCSV(filePath, data) {
|
|
523
|
+
function streamCSV(filePath, data, options = {}) {
|
|
508
524
|
return new Promise((resolve, reject) => {
|
|
509
525
|
let writeStream;
|
|
526
|
+
const { gzip = false } = options;
|
|
527
|
+
|
|
510
528
|
if (filePath?.startsWith('gs://')) {
|
|
511
529
|
const { uri, bucket, file } = parseGCSUri(filePath);
|
|
512
530
|
writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
|
|
513
531
|
}
|
|
514
532
|
else {
|
|
515
533
|
writeStream = fs.createWriteStream(filePath, { encoding: 'utf8' });
|
|
534
|
+
if (gzip) {
|
|
535
|
+
const gzipStream = zlib.createGzip();
|
|
536
|
+
gzipStream.pipe(writeStream);
|
|
537
|
+
writeStream = gzipStream;
|
|
538
|
+
}
|
|
516
539
|
}
|
|
517
540
|
|
|
518
541
|
// Extract all unique keys from the data array
|
|
@@ -539,6 +562,120 @@ function streamCSV(filePath, data) {
|
|
|
539
562
|
});
|
|
540
563
|
}
|
|
541
564
|
|
|
565
|
+
async function streamParquet(filePath, data, options = {}) {
|
|
566
|
+
const { gzip = false } = options;
|
|
567
|
+
|
|
568
|
+
// Dynamically import hyparquet-writer
|
|
569
|
+
// @ts-ignore
|
|
570
|
+
const { parquetWriteFile, parquetWriteBuffer } = await import('hyparquet-writer');
|
|
571
|
+
|
|
572
|
+
if (data.length === 0) {
|
|
573
|
+
throw new Error('Cannot write parquet file with empty data');
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Extract column names and data from the input array
|
|
577
|
+
const columns = getUniqueKeys(data);
|
|
578
|
+
const columnData = columns.map(columnName => {
|
|
579
|
+
const columnValues = data.map(row => {
|
|
580
|
+
let value = row[columnName];
|
|
581
|
+
|
|
582
|
+
// Handle null/undefined values
|
|
583
|
+
if (value === null || value === undefined) {
|
|
584
|
+
return null;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Convert objects to strings
|
|
588
|
+
if (typeof value === 'object') {
|
|
589
|
+
value = JSON.stringify(value);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
return value;
|
|
593
|
+
});
|
|
594
|
+
|
|
595
|
+
// Determine the type based on the first non-null value
|
|
596
|
+
let type = 'STRING'; // default
|
|
597
|
+
const firstValue = columnValues.find(v => v !== null && v !== undefined);
|
|
598
|
+
|
|
599
|
+
if (firstValue !== undefined) {
|
|
600
|
+
if (typeof firstValue === 'boolean') {
|
|
601
|
+
type = 'BOOLEAN';
|
|
602
|
+
} else if (typeof firstValue === 'number') {
|
|
603
|
+
// For parquet compatibility, convert numbers to appropriate types
|
|
604
|
+
if (Number.isInteger(firstValue)) {
|
|
605
|
+
// Use INT32 for smaller integers, convert to BigInt for INT64 if needed
|
|
606
|
+
if (firstValue >= -2147483648 && firstValue <= 2147483647) {
|
|
607
|
+
type = 'INT32';
|
|
608
|
+
} else {
|
|
609
|
+
type = 'INT64';
|
|
610
|
+
// Convert all values to BigInt for INT64
|
|
611
|
+
for (let i = 0; i < columnValues.length; i++) {
|
|
612
|
+
if (columnValues[i] !== null && columnValues[i] !== undefined) {
|
|
613
|
+
columnValues[i] = BigInt(columnValues[i]);
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
} else {
|
|
618
|
+
type = 'DOUBLE';
|
|
619
|
+
}
|
|
620
|
+
} else if (firstValue instanceof Date) {
|
|
621
|
+
type = 'TIMESTAMP';
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
return {
|
|
626
|
+
name: columnName,
|
|
627
|
+
data: columnValues,
|
|
628
|
+
type: type
|
|
629
|
+
};
|
|
630
|
+
});
|
|
631
|
+
|
|
632
|
+
if (filePath?.startsWith('gs://')) {
|
|
633
|
+
// For GCS, write to buffer first, then upload
|
|
634
|
+
// @ts-ignore
|
|
635
|
+
const arrayBuffer = parquetWriteBuffer({ columnData });
|
|
636
|
+
const { bucket, file } = parseGCSUri(filePath);
|
|
637
|
+
|
|
638
|
+
const writeStream = storage.bucket(bucket).file(file).createWriteStream({
|
|
639
|
+
gzip: gzip || true // Always gzip for GCS
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
return new Promise((resolve, reject) => {
|
|
643
|
+
writeStream.write(Buffer.from(arrayBuffer));
|
|
644
|
+
writeStream.end();
|
|
645
|
+
writeStream.on('finish', () => resolve(filePath));
|
|
646
|
+
writeStream.on('error', reject);
|
|
647
|
+
});
|
|
648
|
+
} else {
|
|
649
|
+
// For local files
|
|
650
|
+
let actualFilePath = filePath;
|
|
651
|
+
if (gzip && !filePath.endsWith('.gz')) {
|
|
652
|
+
actualFilePath = filePath + '.gz';
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
if (gzip) {
|
|
656
|
+
// Write to buffer then gzip to disk
|
|
657
|
+
// @ts-ignore
|
|
658
|
+
const arrayBuffer = parquetWriteBuffer({ columnData });
|
|
659
|
+
const buffer = Buffer.from(arrayBuffer);
|
|
660
|
+
const gzippedBuffer = zlib.gzipSync(buffer);
|
|
661
|
+
|
|
662
|
+
return new Promise((resolve, reject) => {
|
|
663
|
+
fs.writeFile(actualFilePath, gzippedBuffer, (err) => {
|
|
664
|
+
if (err) reject(err);
|
|
665
|
+
else resolve(actualFilePath);
|
|
666
|
+
});
|
|
667
|
+
});
|
|
668
|
+
} else {
|
|
669
|
+
// Direct write to disk
|
|
670
|
+
parquetWriteFile({
|
|
671
|
+
filename: filePath,
|
|
672
|
+
columnData
|
|
673
|
+
});
|
|
674
|
+
return Promise.resolve(filePath);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
|
|
542
679
|
|
|
543
680
|
/*
|
|
544
681
|
----
|
|
@@ -848,7 +985,7 @@ function validEvent(row) {
|
|
|
848
985
|
if (!row.time) return false;
|
|
849
986
|
if (!row.device_id && !row.user_id) return false;
|
|
850
987
|
if (!row.insert_id) return false;
|
|
851
|
-
if (!row.source) return false;
|
|
988
|
+
// if (!row.source) return false;
|
|
852
989
|
if (typeof row.time !== 'string') return false;
|
|
853
990
|
return true;
|
|
854
991
|
}
|
|
@@ -870,7 +1007,7 @@ function buildFileNames(config) {
|
|
|
870
1007
|
let extension = "";
|
|
871
1008
|
extension = format === "csv" ? "csv" : "json";
|
|
872
1009
|
// const current = dayjs.utc().format("MM-DD-HH");
|
|
873
|
-
let simName = config.
|
|
1010
|
+
let simName = config.name;
|
|
874
1011
|
let writeDir = typeof config.writeToDisk === 'string' ? config.writeToDisk : "./";
|
|
875
1012
|
if (config.writeToDisk) {
|
|
876
1013
|
const dataFolder = path.resolve("./data");
|
|
@@ -1235,7 +1372,7 @@ function wrapFunc(obj, func, recursion = 0, parentKey = null, grandParentKey = n
|
|
|
1235
1372
|
// }
|
|
1236
1373
|
|
|
1237
1374
|
const chance = getChance();
|
|
1238
|
-
function odds(num) {
|
|
1375
|
+
function odds(num) {
|
|
1239
1376
|
return chance.bool({ likelihood: num });
|
|
1240
1377
|
}
|
|
1241
1378
|
|
|
@@ -1328,7 +1465,7 @@ export {
|
|
|
1328
1465
|
TimeSoup,
|
|
1329
1466
|
companyName,
|
|
1330
1467
|
generateEmoji,
|
|
1331
|
-
hasSameKeys
|
|
1468
|
+
hasSameKeys,
|
|
1332
1469
|
deepClone,
|
|
1333
1470
|
initChance,
|
|
1334
1471
|
getChance,
|
|
@@ -1362,6 +1499,7 @@ export {
|
|
|
1362
1499
|
buildFileNames,
|
|
1363
1500
|
streamJSON,
|
|
1364
1501
|
streamCSV,
|
|
1502
|
+
streamParquet,
|
|
1365
1503
|
datesBetween,
|
|
1366
1504
|
weighChoices,
|
|
1367
1505
|
wrapFunc,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "make-mp-data",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "builds all mixpanel primitives for a given project",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -31,7 +31,8 @@
|
|
|
31
31
|
"test:quick": "node ./tests/benchmark/quick-test.mjs",
|
|
32
32
|
"exp:soup": "node ./tests/testSoup.mjs",
|
|
33
33
|
"func:local": "functions-framework --target=entry",
|
|
34
|
-
"func:deploy": "./scripts/deploy.sh"
|
|
34
|
+
"func:deploy": "./scripts/deploy.sh",
|
|
35
|
+
"textGen": "node ./lib/templates/textQuickTest.js"
|
|
35
36
|
},
|
|
36
37
|
"repository": {
|
|
37
38
|
"type": "git",
|
|
@@ -65,13 +66,15 @@
|
|
|
65
66
|
"ak-gemini": "^1.0.59",
|
|
66
67
|
"ak-tools": "^1.1.12",
|
|
67
68
|
"chance": "^1.1.11",
|
|
68
|
-
"chart.js": "^3.9.1",
|
|
69
|
-
"chartjs-node-canvas": "^4.1.6",
|
|
70
69
|
"dayjs": "^1.11.11",
|
|
71
70
|
"dotenv": "^16.4.5",
|
|
72
71
|
"google-auth-library": "^9.15.0",
|
|
73
|
-
"
|
|
72
|
+
"hyparquet-writer": "^0.6.1",
|
|
73
|
+
"mixpanel-import": "^3.0.0",
|
|
74
74
|
"p-limit": "^3.1.0",
|
|
75
|
+
"seedrandom": "^3.0.5",
|
|
76
|
+
"sentiment": "^5.0.2",
|
|
77
|
+
"tracery-grammar": "^2.8.4",
|
|
75
78
|
"yargs": "^17.7.2"
|
|
76
79
|
},
|
|
77
80
|
"devDependencies": {
|