make-mp-data 2.0.23 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dungeons/ai-chat-analytics-ed.js +274 -0
- package/dungeons/business.js +0 -1
- package/dungeons/complex.js +0 -1
- package/dungeons/experiments.js +0 -1
- package/dungeons/gaming.js +47 -14
- package/dungeons/media.js +5 -6
- package/dungeons/mil.js +296 -0
- package/dungeons/money2020-ed-also.js +277 -0
- package/dungeons/money2020-ed.js +579 -0
- package/dungeons/sanity.js +0 -1
- package/dungeons/scd.js +0 -1
- package/dungeons/simple.js +57 -18
- package/dungeons/student-teacher.js +0 -1
- package/dungeons/text-generation.js +706 -0
- package/dungeons/userAgent.js +1 -2
- package/entry.js +3 -0
- package/index.js +13 -40
- package/lib/cli/cli.js +0 -7
- package/lib/core/config-validator.js +6 -8
- package/lib/generators/adspend.js +1 -1
- package/lib/generators/events.js +1 -1
- package/lib/generators/funnels.js +293 -242
- package/lib/generators/text-bak-old.js +1121 -0
- package/lib/generators/text.js +1173 -0
- package/lib/orchestrators/mixpanel-sender.js +1 -1
- package/lib/templates/abbreviated.d.ts +13 -3
- package/lib/templates/defaults.js +311 -169
- package/lib/templates/hooks-instructions.txt +434 -0
- package/lib/templates/phrases-bak.js +925 -0
- package/lib/templates/phrases.js +2066 -0
- package/lib/templates/{instructions.txt → schema-instructions.txt} +78 -1
- package/lib/templates/scratch-dungeon-template.js +1 -1
- package/lib/templates/textQuickTest.js +172 -0
- package/lib/utils/ai.js +51 -2
- package/lib/utils/utils.js +29 -18
- package/package.json +7 -5
- package/types.d.ts +319 -4
- package/lib/utils/chart.js +0 -206
|
@@ -58,7 +58,84 @@ Core Requirements:
|
|
|
58
58
|
- Use lookup tables if events reference external entities with their own attributes (e.g., product_id, video_id).
|
|
59
59
|
- Use funnel conditions when different user segments or cohorts should have different behavioral patterns (e.g., premium vs free users, students vs teachers, rider vs driver, doctor vs patient).
|
|
60
60
|
|
|
61
|
-
4. Available Functions: You have access to these built-in functions: date, weighNumRange, range, and
|
|
61
|
+
4. Available Functions: You have access to these built-in functions: date, weighNumRange, range, chance library methods, and createGenerator for structured text generation.
|
|
62
|
+
|
|
63
|
+
5. Structured Text Generation: When your use case involves user-generated content (reviews, comments, support tickets, chat messages, social media posts, etc.), use createGenerator() to produce realistic, contextual text.
|
|
64
|
+
|
|
65
|
+
Text Generator Usage:
|
|
66
|
+
|
|
67
|
+
- createGenerator() creates a text generator object that produces varied, authentic-sounding text
|
|
68
|
+
- Embed the createGenerator() call directly in the properties object (NOT as a quoted string)
|
|
69
|
+
- The generator automatically produces unique text for each event
|
|
70
|
+
|
|
71
|
+
Example for a customer support platform:
|
|
72
|
+
|
|
73
|
+
{
|
|
74
|
+
event: "support_ticket_submitted",
|
|
75
|
+
properties: {
|
|
76
|
+
ticket_text: createGenerator({
|
|
77
|
+
style: "support",
|
|
78
|
+
tone: "neg",
|
|
79
|
+
formality: "business",
|
|
80
|
+
keywords: {
|
|
81
|
+
features: ["Dashboard", "Export API", "User Management"],
|
|
82
|
+
technical: ["timeout", "authentication", "database error"],
|
|
83
|
+
errors: ["500 Internal Server", "TIMEOUT_ERROR"]
|
|
84
|
+
},
|
|
85
|
+
min: 80,
|
|
86
|
+
max: 300
|
|
87
|
+
}),
|
|
88
|
+
priority: ["low", "medium", "high", "critical"]
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
Example for social media platform:
|
|
93
|
+
|
|
94
|
+
{
|
|
95
|
+
event: "comment_posted",
|
|
96
|
+
properties: {
|
|
97
|
+
comment_text: createGenerator({
|
|
98
|
+
style: "chat",
|
|
99
|
+
tone: "pos",
|
|
100
|
+
formality: "casual",
|
|
101
|
+
keywords: {
|
|
102
|
+
products: ["the app", "this feature"],
|
|
103
|
+
emotions: ["love it", "amazing", "great job"]
|
|
104
|
+
},
|
|
105
|
+
typos: true,
|
|
106
|
+
typoRate: 0.03,
|
|
107
|
+
min: 10,
|
|
108
|
+
max: 150
|
|
109
|
+
}),
|
|
110
|
+
post_type: ["text", "image", "video"]
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
Text Generator Options:
|
|
115
|
+
|
|
116
|
+
- style: "support", "review", "forum", "search", "feedback", "chat", "comments", "tweet", "email"
|
|
117
|
+
- tone: "pos" (positive), "neg" (negative), "neu" (neutral)
|
|
118
|
+
- formality: "casual", "business", "technical"
|
|
119
|
+
- keywords: Object with arrays of domain-specific terms to include (features, products, technical, errors, etc.)
|
|
120
|
+
- min/max: Character length range
|
|
121
|
+
- typos: true/false (adds realistic typos)
|
|
122
|
+
- typoRate: 0.01 to 0.1 (percentage of typos)
|
|
123
|
+
- mixedSentiment: true/false (varies sentiment within text)
|
|
124
|
+
- authenticityLevel: 0.0 to 1.0 (how "real" the text sounds)
|
|
125
|
+
- specificityLevel: 0.0 to 1.0 (level of detail)
|
|
126
|
+
|
|
127
|
+
When to Use createGenerator:
|
|
128
|
+
|
|
129
|
+
- Customer support tickets and responses
|
|
130
|
+
- Product reviews and ratings
|
|
131
|
+
- Social media posts (tweets, LinkedIn, Reddit)
|
|
132
|
+
- Forum discussions and comments
|
|
133
|
+
- Chat messages and live support
|
|
134
|
+
- Search queries
|
|
135
|
+
- User feedback and feature requests
|
|
136
|
+
- Email communications
|
|
137
|
+
- Bug reports
|
|
138
|
+
- Any scenario involving user-written or generated text content
|
|
62
139
|
|
|
63
140
|
❌ Critical Rules to Follow:
|
|
64
141
|
|
|
@@ -8,7 +8,7 @@ dayjs.extend(utc);
|
|
|
8
8
|
import "dotenv/config";
|
|
9
9
|
import { weighNumRange, range, date, initChance, exhaust, choose, integer, decimal, odds } from "../utils/utils.js";
|
|
10
10
|
const { NODE_ENV = "unknown" } = process.env;
|
|
11
|
-
|
|
11
|
+
import * as u from '../utils/utils.js'
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
/**
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { createGenerator, generateBatch } from "../generators/text.js";
|
|
2
|
+
|
|
3
|
+
function main() {
|
|
4
|
+
// Enterprise support ticket generator with keywords and high authenticity
|
|
5
|
+
const enterpriseSupportGen = createGenerator({
|
|
6
|
+
style: "support",
|
|
7
|
+
tone: "neg",
|
|
8
|
+
intensity: "high",
|
|
9
|
+
formality: "technical",
|
|
10
|
+
keywords: {
|
|
11
|
+
features: ['Dashboard Analytics', 'Export API', 'SSO Login', 'Admin Console', 'User Management'],
|
|
12
|
+
products: ['DataViz Pro', 'Enterprise Suite', 'v3.2.1', 'v2.8.4'],
|
|
13
|
+
technical: ['CORS error', 'timeout', 'memory leak', 'authentication', 'database'],
|
|
14
|
+
errors: ['ERR_CONNECTION_REFUSED', '500 Internal Server', 'TIMEOUT_ERROR', 'AUTH_FAILED'],
|
|
15
|
+
competitors: ['Tableau', 'PowerBI', 'Looker', 'Qlik']
|
|
16
|
+
},
|
|
17
|
+
mixedSentiment: true,
|
|
18
|
+
authenticityLevel: 0.7,
|
|
19
|
+
typos: true,
|
|
20
|
+
typoRate: 0.02,
|
|
21
|
+
specificityLevel: 0.8,
|
|
22
|
+
min: 80,
|
|
23
|
+
max: 300,
|
|
24
|
+
includeMetadata: false,
|
|
25
|
+
// System is now always optimized for speed + uniqueness
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const twentyEnterpriseSupport = enterpriseSupportGen.generateBatch({ n: 20 });
|
|
29
|
+
|
|
30
|
+
// Casual review generator with typos and mixed sentiment
|
|
31
|
+
const casualReviewGen = createGenerator({
|
|
32
|
+
style: "review",
|
|
33
|
+
tone: "pos",
|
|
34
|
+
intensity: "medium",
|
|
35
|
+
formality: "casual",
|
|
36
|
+
keywords: {
|
|
37
|
+
features: ['user interface', 'mobile app', 'notifications', 'search function'],
|
|
38
|
+
products: ['the app', 'this tool', 'the platform'],
|
|
39
|
+
metrics: ['response time', 'uptime', 'user experience']
|
|
40
|
+
},
|
|
41
|
+
mixedSentiment: true,
|
|
42
|
+
authenticityLevel: 0.4,
|
|
43
|
+
typos: true,
|
|
44
|
+
typoRate: 0.03,
|
|
45
|
+
sentimentDrift: 0.3,
|
|
46
|
+
min: 30,
|
|
47
|
+
max: 200,
|
|
48
|
+
includeMetadata: false
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const twentyCasualReviews = casualReviewGen.generateBatch({ n: 20 });
|
|
52
|
+
|
|
53
|
+
// Technical forum posts with advanced features
|
|
54
|
+
const technicalForumGen = createGenerator({
|
|
55
|
+
style: "forum",
|
|
56
|
+
tone: "neu",
|
|
57
|
+
formality: "technical",
|
|
58
|
+
keywords: {
|
|
59
|
+
technical: ['REST API', 'GraphQL', 'webhooks', 'microservices', 'cloud infrastructure'],
|
|
60
|
+
versions: ['v1.2.3', 'latest', 'beta', 'stable release'],
|
|
61
|
+
errors: ['404 Not Found', 'Rate Limiting', 'SSL Certificate']
|
|
62
|
+
},
|
|
63
|
+
userPersona: true,
|
|
64
|
+
timestamps: true,
|
|
65
|
+
authenticityLevel: 0.6,
|
|
66
|
+
specificityLevel: 0.9,
|
|
67
|
+
min: 20,
|
|
68
|
+
max: 250,
|
|
69
|
+
includeMetadata: false
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const twentyTechForms = technicalForumGen.generateBatch({ n: 20 });
|
|
73
|
+
|
|
74
|
+
// Search queries with realistic typos
|
|
75
|
+
const searchQueryGen = createGenerator({
|
|
76
|
+
style: "search",
|
|
77
|
+
tone: "neu",
|
|
78
|
+
formality: "casual",
|
|
79
|
+
keywords: {
|
|
80
|
+
features: ['export data', 'user settings', 'help docs', 'pricing'],
|
|
81
|
+
technical: ['API documentation', 'integration guide', 'troubleshooting']
|
|
82
|
+
},
|
|
83
|
+
typos: true,
|
|
84
|
+
typoRate: 0.05,
|
|
85
|
+
min: 2,
|
|
86
|
+
max: 50,
|
|
87
|
+
includeMetadata: false
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
const twentySearch = searchQueryGen.generateBatch({ n: 20 });
|
|
91
|
+
|
|
92
|
+
// Business feedback with professional tone
|
|
93
|
+
const businessFeedbackGen = createGenerator({
|
|
94
|
+
style: "feedback",
|
|
95
|
+
tone: "neu",
|
|
96
|
+
formality: "business",
|
|
97
|
+
keywords: {
|
|
98
|
+
metrics: ['ROI', 'efficiency', 'cost savings', 'productivity'],
|
|
99
|
+
features: ['reporting', 'analytics', 'integration capabilities']
|
|
100
|
+
},
|
|
101
|
+
authenticityLevel: 0.3,
|
|
102
|
+
specificityLevel: 0.7,
|
|
103
|
+
min: 40,
|
|
104
|
+
max: 180,
|
|
105
|
+
includeMetadata: false
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const twentyFeedback = businessFeedbackGen.generateBatch({ n: 20 });
|
|
109
|
+
|
|
110
|
+
// Chat messages with high authenticity and typos
|
|
111
|
+
const chatMessageGen = createGenerator({
|
|
112
|
+
style: "chat",
|
|
113
|
+
tone: "neu",
|
|
114
|
+
formality: "casual",
|
|
115
|
+
keywords: {
|
|
116
|
+
products: ['the app', 'dashboard', 'mobile version'],
|
|
117
|
+
features: ['notifications', 'sync', 'offline mode']
|
|
118
|
+
},
|
|
119
|
+
mixedSentiment: true,
|
|
120
|
+
authenticityLevel: 0.8,
|
|
121
|
+
typos: true,
|
|
122
|
+
typoRate: 0.04,
|
|
123
|
+
sentimentDrift: 0.4,
|
|
124
|
+
min: 5,
|
|
125
|
+
max: 150,
|
|
126
|
+
includeMetadata: false
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
const twentyChatMsg = chatMessageGen.generateBatch({ n: 20 });
|
|
130
|
+
|
|
131
|
+
// Email communication generator
|
|
132
|
+
const emailGen = createGenerator({
|
|
133
|
+
style: "email",
|
|
134
|
+
tone: "neu",
|
|
135
|
+
formality: "business",
|
|
136
|
+
keywords: {
|
|
137
|
+
features: ['account management', 'billing', 'subscription'],
|
|
138
|
+
products: ['Enterprise Plan', 'Pro Account']
|
|
139
|
+
},
|
|
140
|
+
authenticityLevel: 0.5,
|
|
141
|
+
userPersona: true,
|
|
142
|
+
min: 60,
|
|
143
|
+
max: 300,
|
|
144
|
+
includeMetadata: false
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
const twentyEmails = emailGen.generateBatch({ n: 20 });
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
email: twentyEmails,
|
|
151
|
+
chat: twentyChatMsg,
|
|
152
|
+
feedback: twentyFeedback,
|
|
153
|
+
search: twentySearch,
|
|
154
|
+
tech: twentyTechForms,
|
|
155
|
+
casual: twentyCasualReviews,
|
|
156
|
+
enterprise: twentyEnterpriseSupport
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
const result = main();
|
|
163
|
+
|
|
164
|
+
for (const key in result) {
|
|
165
|
+
console.log(`${key?.toUpperCase()}`);
|
|
166
|
+
console.log(`----------`)
|
|
167
|
+
const toShow = result[key]?.join("\n\n");
|
|
168
|
+
console.log(toShow)
|
|
169
|
+
console.log(`----------\n\n`)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// debugger;
|
package/lib/utils/ai.js
CHANGED
|
@@ -21,12 +21,28 @@ CURRENT_PROMPT = `build me a dungeon stream with these events and structure
|
|
|
21
21
|
but use all the different mcdonalds products as a possible values`;
|
|
22
22
|
CURRENT_PROMPT = ``;
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
export async function ask(question) {
|
|
25
|
+
const prompt = question;
|
|
26
|
+
const ai = new AITransformer({
|
|
27
|
+
apiKey: API_KEY,
|
|
28
|
+
onlyJSON: false,
|
|
29
|
+
responseSchema: {
|
|
30
|
+
type: "string"
|
|
31
|
+
},
|
|
32
|
+
modelName: "gemini-2.5-flash-lite"
|
|
33
|
+
});
|
|
34
|
+
await ai.init();
|
|
35
|
+
const response = await ai.message(prompt);
|
|
36
|
+
// if (NODE_ENV === "dev") {
|
|
37
|
+
// debugger;
|
|
38
|
+
// }
|
|
39
|
+
return response?.toString()?.trim();
|
|
40
|
+
}
|
|
25
41
|
|
|
26
42
|
async function main(params) {
|
|
27
43
|
const { prompt } = params;
|
|
28
44
|
if (!prompt) throw new Error("Please provide a prompt");
|
|
29
|
-
let INSTRUCTIONS = await u.load('./lib/templates/instructions.txt', false);
|
|
45
|
+
let INSTRUCTIONS = await u.load('./lib/templates/schema-instructions.txt', false);
|
|
30
46
|
const TYPES = await u.load('./lib/templates/abbreviated.d.ts', false);
|
|
31
47
|
const VERBOSE_SCHEMA_FILE = await u.load('./lib/templates/verbose-schema.js', false);
|
|
32
48
|
const VERBOSE_SCHEMA = VERBOSE_SCHEMA_FILE.split(`//SPLIT HERE`).pop()?.trim() || ``;
|
|
@@ -53,6 +69,39 @@ async function main(params) {
|
|
|
53
69
|
|
|
54
70
|
}
|
|
55
71
|
|
|
72
|
+
/**
|
|
73
|
+
* Generate AI hooks based on current schema and user's desired trends
|
|
74
|
+
* @param {object} params - Parameters object
|
|
75
|
+
* @param {string} params.prompt - User's description of desired trends
|
|
76
|
+
* @param {object} params.currentSchema - The existing dungeon schema
|
|
77
|
+
* @returns {Promise<string>} Generated hook function code
|
|
78
|
+
*/
|
|
79
|
+
export async function generateAIHooks(params) {
|
|
80
|
+
const { prompt, currentSchema } = params;
|
|
81
|
+
if (!prompt) throw new Error("Please provide a prompt describing the trends you want");
|
|
82
|
+
if (!currentSchema) throw new Error("Please provide the current schema");
|
|
83
|
+
|
|
84
|
+
// Load hooks instructions template
|
|
85
|
+
let HOOKS_INSTRUCTIONS = await u.load('./lib/templates/hooks-instructions.txt', false);
|
|
86
|
+
|
|
87
|
+
// Format the current schema as a readable string
|
|
88
|
+
const schemaString = JSON.stringify(currentSchema, null, 2);
|
|
89
|
+
|
|
90
|
+
// Replace placeholder with actual schema
|
|
91
|
+
HOOKS_INSTRUCTIONS = HOOKS_INSTRUCTIONS.replace(/<CURRENT_SCHEMA>/g, schemaString);
|
|
92
|
+
|
|
93
|
+
const ai = new AITransformer({
|
|
94
|
+
apiKey: API_KEY,
|
|
95
|
+
onlyJSON: false,
|
|
96
|
+
systemInstructions: HOOKS_INSTRUCTIONS?.trim(),
|
|
97
|
+
modelName: "gemini-2.5-pro",
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
await ai.init();
|
|
101
|
+
const response = await ai.message(prompt);
|
|
102
|
+
|
|
103
|
+
return response?.toString()?.trim();
|
|
104
|
+
}
|
|
56
105
|
|
|
57
106
|
export default main;
|
|
58
107
|
|
package/lib/utils/utils.js
CHANGED
|
@@ -12,7 +12,7 @@ import zlib from 'zlib';
|
|
|
12
12
|
dayjs.extend(utc);
|
|
13
13
|
import 'dotenv/config';
|
|
14
14
|
import { domainSuffix, domainPrefix } from '../templates/defaults.js';
|
|
15
|
-
const {NODE_ENV = "unknown"} = process.env;
|
|
15
|
+
const { NODE_ENV = "unknown" } = process.env;
|
|
16
16
|
|
|
17
17
|
/** @typedef {import('../../types').Dungeon} Config */
|
|
18
18
|
/** @typedef {import('../../types').EventConfig} EventConfig */
|
|
@@ -208,6 +208,14 @@ function choose(value) {
|
|
|
208
208
|
|
|
209
209
|
}
|
|
210
210
|
|
|
211
|
+
// if the thing has a .next() method, call that
|
|
212
|
+
try {
|
|
213
|
+
if (value && typeof value.next === 'function') {
|
|
214
|
+
return value.next();
|
|
215
|
+
}
|
|
216
|
+
} catch (e) {
|
|
217
|
+
console.error(`Error occurred while calling next(): ${e}`);
|
|
218
|
+
}
|
|
211
219
|
|
|
212
220
|
try {
|
|
213
221
|
// Keep resolving the value if it's a function (with caching)
|
|
@@ -488,7 +496,7 @@ function streamJSON(filePath, data, options = {}) {
|
|
|
488
496
|
return new Promise((resolve, reject) => {
|
|
489
497
|
let writeStream;
|
|
490
498
|
const { gzip = false } = options;
|
|
491
|
-
|
|
499
|
+
|
|
492
500
|
if (filePath?.startsWith('gs://')) {
|
|
493
501
|
const { uri, bucket, file } = parseGCSUri(filePath);
|
|
494
502
|
writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
|
|
@@ -516,7 +524,7 @@ function streamCSV(filePath, data, options = {}) {
|
|
|
516
524
|
return new Promise((resolve, reject) => {
|
|
517
525
|
let writeStream;
|
|
518
526
|
const { gzip = false } = options;
|
|
519
|
-
|
|
527
|
+
|
|
520
528
|
if (filePath?.startsWith('gs://')) {
|
|
521
529
|
const { uri, bucket, file } = parseGCSUri(filePath);
|
|
522
530
|
writeStream = storage.bucket(bucket).file(file).createWriteStream({ gzip: true });
|
|
@@ -556,10 +564,11 @@ function streamCSV(filePath, data, options = {}) {
|
|
|
556
564
|
|
|
557
565
|
async function streamParquet(filePath, data, options = {}) {
|
|
558
566
|
const { gzip = false } = options;
|
|
559
|
-
|
|
567
|
+
|
|
560
568
|
// Dynamically import hyparquet-writer
|
|
569
|
+
// @ts-ignore
|
|
561
570
|
const { parquetWriteFile, parquetWriteBuffer } = await import('hyparquet-writer');
|
|
562
|
-
|
|
571
|
+
|
|
563
572
|
if (data.length === 0) {
|
|
564
573
|
throw new Error('Cannot write parquet file with empty data');
|
|
565
574
|
}
|
|
@@ -569,24 +578,24 @@ async function streamParquet(filePath, data, options = {}) {
|
|
|
569
578
|
const columnData = columns.map(columnName => {
|
|
570
579
|
const columnValues = data.map(row => {
|
|
571
580
|
let value = row[columnName];
|
|
572
|
-
|
|
581
|
+
|
|
573
582
|
// Handle null/undefined values
|
|
574
583
|
if (value === null || value === undefined) {
|
|
575
584
|
return null;
|
|
576
585
|
}
|
|
577
|
-
|
|
586
|
+
|
|
578
587
|
// Convert objects to strings
|
|
579
588
|
if (typeof value === 'object') {
|
|
580
589
|
value = JSON.stringify(value);
|
|
581
590
|
}
|
|
582
|
-
|
|
591
|
+
|
|
583
592
|
return value;
|
|
584
593
|
});
|
|
585
|
-
|
|
594
|
+
|
|
586
595
|
// Determine the type based on the first non-null value
|
|
587
596
|
let type = 'STRING'; // default
|
|
588
597
|
const firstValue = columnValues.find(v => v !== null && v !== undefined);
|
|
589
|
-
|
|
598
|
+
|
|
590
599
|
if (firstValue !== undefined) {
|
|
591
600
|
if (typeof firstValue === 'boolean') {
|
|
592
601
|
type = 'BOOLEAN';
|
|
@@ -612,7 +621,7 @@ async function streamParquet(filePath, data, options = {}) {
|
|
|
612
621
|
type = 'TIMESTAMP';
|
|
613
622
|
}
|
|
614
623
|
}
|
|
615
|
-
|
|
624
|
+
|
|
616
625
|
return {
|
|
617
626
|
name: columnName,
|
|
618
627
|
data: columnValues,
|
|
@@ -622,13 +631,14 @@ async function streamParquet(filePath, data, options = {}) {
|
|
|
622
631
|
|
|
623
632
|
if (filePath?.startsWith('gs://')) {
|
|
624
633
|
// For GCS, write to buffer first, then upload
|
|
634
|
+
// @ts-ignore
|
|
625
635
|
const arrayBuffer = parquetWriteBuffer({ columnData });
|
|
626
636
|
const { bucket, file } = parseGCSUri(filePath);
|
|
627
|
-
|
|
628
|
-
const writeStream = storage.bucket(bucket).file(file).createWriteStream({
|
|
637
|
+
|
|
638
|
+
const writeStream = storage.bucket(bucket).file(file).createWriteStream({
|
|
629
639
|
gzip: gzip || true // Always gzip for GCS
|
|
630
640
|
});
|
|
631
|
-
|
|
641
|
+
|
|
632
642
|
return new Promise((resolve, reject) => {
|
|
633
643
|
writeStream.write(Buffer.from(arrayBuffer));
|
|
634
644
|
writeStream.end();
|
|
@@ -641,13 +651,14 @@ async function streamParquet(filePath, data, options = {}) {
|
|
|
641
651
|
if (gzip && !filePath.endsWith('.gz')) {
|
|
642
652
|
actualFilePath = filePath + '.gz';
|
|
643
653
|
}
|
|
644
|
-
|
|
654
|
+
|
|
645
655
|
if (gzip) {
|
|
646
656
|
// Write to buffer then gzip to disk
|
|
657
|
+
// @ts-ignore
|
|
647
658
|
const arrayBuffer = parquetWriteBuffer({ columnData });
|
|
648
659
|
const buffer = Buffer.from(arrayBuffer);
|
|
649
660
|
const gzippedBuffer = zlib.gzipSync(buffer);
|
|
650
|
-
|
|
661
|
+
|
|
651
662
|
return new Promise((resolve, reject) => {
|
|
652
663
|
fs.writeFile(actualFilePath, gzippedBuffer, (err) => {
|
|
653
664
|
if (err) reject(err);
|
|
@@ -974,7 +985,7 @@ function validEvent(row) {
|
|
|
974
985
|
if (!row.time) return false;
|
|
975
986
|
if (!row.device_id && !row.user_id) return false;
|
|
976
987
|
if (!row.insert_id) return false;
|
|
977
|
-
if (!row.source) return false;
|
|
988
|
+
// if (!row.source) return false;
|
|
978
989
|
if (typeof row.time !== 'string') return false;
|
|
979
990
|
return true;
|
|
980
991
|
}
|
|
@@ -1361,7 +1372,7 @@ function wrapFunc(obj, func, recursion = 0, parentKey = null, grandParentKey = n
|
|
|
1361
1372
|
// }
|
|
1362
1373
|
|
|
1363
1374
|
const chance = getChance();
|
|
1364
|
-
function odds(num) {
|
|
1375
|
+
function odds(num) {
|
|
1365
1376
|
return chance.bool({ likelihood: num });
|
|
1366
1377
|
}
|
|
1367
1378
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "make-mp-data",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.1",
|
|
4
4
|
"description": "builds all mixpanel primitives for a given project",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|
|
@@ -31,7 +31,8 @@
|
|
|
31
31
|
"test:quick": "node ./tests/benchmark/quick-test.mjs",
|
|
32
32
|
"exp:soup": "node ./tests/testSoup.mjs",
|
|
33
33
|
"func:local": "functions-framework --target=entry",
|
|
34
|
-
"func:deploy": "./scripts/deploy.sh"
|
|
34
|
+
"func:deploy": "./scripts/deploy.sh",
|
|
35
|
+
"textGen": "node ./lib/templates/textQuickTest.js"
|
|
35
36
|
},
|
|
36
37
|
"repository": {
|
|
37
38
|
"type": "git",
|
|
@@ -65,14 +66,15 @@
|
|
|
65
66
|
"ak-gemini": "^1.0.59",
|
|
66
67
|
"ak-tools": "^1.1.12",
|
|
67
68
|
"chance": "^1.1.11",
|
|
68
|
-
"chart.js": "^3.9.1",
|
|
69
|
-
"chartjs-node-canvas": "^4.1.6",
|
|
70
69
|
"dayjs": "^1.11.11",
|
|
71
70
|
"dotenv": "^16.4.5",
|
|
72
71
|
"google-auth-library": "^9.15.0",
|
|
73
72
|
"hyparquet-writer": "^0.6.1",
|
|
74
|
-
"mixpanel-import": "^
|
|
73
|
+
"mixpanel-import": "^3.0.0",
|
|
75
74
|
"p-limit": "^3.1.0",
|
|
75
|
+
"seedrandom": "^3.0.5",
|
|
76
|
+
"sentiment": "^5.0.2",
|
|
77
|
+
"tracery-grammar": "^2.8.4",
|
|
76
78
|
"yargs": "^17.7.2"
|
|
77
79
|
},
|
|
78
80
|
"devDependencies": {
|