make-mp-data 2.0.22 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dungeons/ai-chat-analytics-ed.js +274 -0
- package/dungeons/business.js +0 -1
- package/dungeons/complex.js +0 -1
- package/dungeons/experiments.js +0 -1
- package/dungeons/gaming.js +47 -14
- package/dungeons/media.js +5 -6
- package/dungeons/mil.js +296 -0
- package/dungeons/money2020-ed-also.js +277 -0
- package/dungeons/money2020-ed.js +579 -0
- package/dungeons/sanity.js +0 -1
- package/dungeons/scd.js +0 -1
- package/dungeons/simple.js +57 -18
- package/dungeons/student-teacher.js +0 -1
- package/dungeons/text-generation.js +706 -0
- package/dungeons/userAgent.js +1 -2
- package/entry.js +4 -0
- package/index.js +63 -38
- package/lib/cli/cli.js +7 -8
- package/lib/core/config-validator.js +11 -13
- package/lib/core/context.js +13 -1
- package/lib/core/storage.js +45 -13
- package/lib/generators/adspend.js +1 -1
- package/lib/generators/events.js +18 -17
- package/lib/generators/funnels.js +293 -240
- package/lib/generators/text-bak-old.js +1121 -0
- package/lib/generators/text.js +1173 -0
- package/lib/orchestrators/mixpanel-sender.js +1 -1
- package/lib/templates/abbreviated.d.ts +13 -3
- package/lib/templates/defaults.js +311 -169
- package/lib/templates/hooks-instructions.txt +434 -0
- package/lib/templates/phrases-bak.js +925 -0
- package/lib/templates/phrases.js +2066 -0
- package/lib/templates/{instructions.txt → schema-instructions.txt} +78 -1
- package/lib/templates/scratch-dungeon-template.js +1 -1
- package/lib/templates/textQuickTest.js +172 -0
- package/lib/utils/ai.js +51 -2
- package/lib/utils/utils.js +145 -7
- package/package.json +8 -5
- package/types.d.ts +322 -7
- package/lib/utils/chart.js +0 -206
|
@@ -0,0 +1,1121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realistic Text Generation Module
|
|
3
|
+
* Generates authentic-feeling unstructured text with natural language patterns
|
|
4
|
+
* @module text
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/* cSpell:disable */
|
|
8
|
+
|
|
9
|
+
import tracery from 'tracery-grammar';
|
|
10
|
+
import seedrandom from 'seedrandom';
|
|
11
|
+
import crypto from 'crypto';
|
|
12
|
+
import SentimentPkg from 'sentiment';
|
|
13
|
+
import {
|
|
14
|
+
PHRASE_BANK,
|
|
15
|
+
FORMALITY_MODIFIERS,
|
|
16
|
+
INTENSITY_MODIFIERS,
|
|
17
|
+
STYLE_MODIFIERS,
|
|
18
|
+
USER_PERSONAS,
|
|
19
|
+
DISCOURSE_MARKERS,
|
|
20
|
+
TYPO_PATTERNS
|
|
21
|
+
} from '../templates/phrases.js';
|
|
22
|
+
const {NODE_ENV = "unknown"} = process.env;
|
|
23
|
+
|
|
24
|
+
const Sentiment = typeof SentimentPkg === 'function' ? SentimentPkg : SentimentPkg.default;
|
|
25
|
+
const sentiment = new Sentiment();
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* @typedef {import('../../types.js').TextTone} TextTone
|
|
29
|
+
* @typedef {import('../../types.js').TextStyle} TextStyle
|
|
30
|
+
* @typedef {import('../../types.js').TextIntensity} TextIntensity
|
|
31
|
+
* @typedef {import('../../types.js').TextFormality} TextFormality
|
|
32
|
+
* @typedef {import('../../types.js').TextReturnType} TextReturnType
|
|
33
|
+
* @typedef {import('../../types.js').TextKeywordSet} TextKeywordSet
|
|
34
|
+
* @typedef {import('../../types.js').TextGeneratorConfig} TextGeneratorConfig
|
|
35
|
+
* @typedef {import('../../types.js').TextMetadata} TextMetadata
|
|
36
|
+
* @typedef {import('../../types.js').SimpleGeneratedText} SimpleGeneratedText
|
|
37
|
+
* @typedef {import('../../types.js').GeneratedText} GeneratedText
|
|
38
|
+
* @typedef {import('../../types.js').TextBatchOptions} TextBatchOptions
|
|
39
|
+
* @typedef {import('../../types.js').TextGeneratorStats} TextGeneratorStats
|
|
40
|
+
* @typedef {import('../../types.js').TextGenerator} TextGenerator
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
// ============= Core Utilities =============
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Fast hash function for duplicate detection (much faster than SimHash)
|
|
47
|
+
* @private
|
|
48
|
+
*/
|
|
49
|
+
function fastHash(text) {
|
|
50
|
+
// Simple but effective hash for duplicate detection
|
|
51
|
+
const normalized = text.toLowerCase().replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, ' ').trim();
|
|
52
|
+
let hash = 0;
|
|
53
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
54
|
+
const char = normalized.charCodeAt(i);
|
|
55
|
+
hash = ((hash << 5) - hash) + char;
|
|
56
|
+
hash = hash & hash; // Convert to 32-bit integer
|
|
57
|
+
}
|
|
58
|
+
return hash;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Ring buffer for recent text tracking (prevents recent duplicates efficiently)
|
|
63
|
+
* @private
|
|
64
|
+
*/
|
|
65
|
+
class RecentTextCache {
|
|
66
|
+
constructor(size = 100) {
|
|
67
|
+
this.maxSize = size;
|
|
68
|
+
this.texts = new Set();
|
|
69
|
+
this.hashes = new Set();
|
|
70
|
+
this.queue = [];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
has(text) {
|
|
74
|
+
const hash = fastHash(text);
|
|
75
|
+
return this.hashes.has(hash) || this.texts.has(text);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
add(text) {
|
|
79
|
+
const hash = fastHash(text);
|
|
80
|
+
|
|
81
|
+
// Remove oldest if at capacity
|
|
82
|
+
if (this.queue.length >= this.maxSize) {
|
|
83
|
+
const oldest = this.queue.shift();
|
|
84
|
+
this.texts.delete(oldest);
|
|
85
|
+
this.hashes.delete(fastHash(oldest));
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Add new
|
|
89
|
+
this.queue.push(text);
|
|
90
|
+
this.texts.add(text);
|
|
91
|
+
this.hashes.add(hash);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
size() {
|
|
95
|
+
return this.queue.length;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Simple PRNG for deterministic but diverse generation
|
|
101
|
+
* @private
|
|
102
|
+
*/
|
|
103
|
+
class DiversityRNG {
|
|
104
|
+
constructor(baseSeed = Math.floor(Math.random() * 1000000)) {
|
|
105
|
+
this.baseSeed = baseSeed;
|
|
106
|
+
this.counter = 0;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
next() {
|
|
110
|
+
this.counter++;
|
|
111
|
+
let seed = this.baseSeed + this.counter * 9301;
|
|
112
|
+
seed = (seed * 9301 + 49297) % 233280;
|
|
113
|
+
return seed / 233280;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Create temporary Math.random override for diversity
|
|
117
|
+
withDiversity(fn) {
|
|
118
|
+
const originalRandom = Math.random;
|
|
119
|
+
Math.random = () => this.next();
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
return fn();
|
|
123
|
+
} finally {
|
|
124
|
+
Math.random = originalRandom;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Extract word patterns for similarity detection
|
|
131
|
+
* @private
|
|
132
|
+
*/
|
|
133
|
+
function getWordPattern(text) {
|
|
134
|
+
const words = text.toLowerCase()
|
|
135
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
136
|
+
.split(/\s+/)
|
|
137
|
+
.filter(Boolean);
|
|
138
|
+
|
|
139
|
+
if (words.length <= 3) return words.join('|');
|
|
140
|
+
|
|
141
|
+
// Use first 3 and last 3 words for pattern matching
|
|
142
|
+
const start = words.slice(0, 3).join('|');
|
|
143
|
+
const end = words.slice(-3).join('|');
|
|
144
|
+
return `${start}...${end}`;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Calculate simple similarity score (0-1)
|
|
149
|
+
* @private
|
|
150
|
+
*/
|
|
151
|
+
function calculateSimilarity(text1, text2) {
|
|
152
|
+
const words1 = new Set(text1.toLowerCase().split(/\s+/));
|
|
153
|
+
const words2 = new Set(text2.toLowerCase().split(/\s+/));
|
|
154
|
+
|
|
155
|
+
const intersection = new Set([...words1].filter(x => words2.has(x)));
|
|
156
|
+
const union = new Set([...words1, ...words2]);
|
|
157
|
+
|
|
158
|
+
return intersection.size / union.size;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Random chance helper
|
|
163
|
+
* @private
|
|
164
|
+
*/
|
|
165
|
+
function chance(probability) {
|
|
166
|
+
return Math.random() < probability;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Pick weighted random item from array
|
|
171
|
+
* @private
|
|
172
|
+
*/
|
|
173
|
+
function weightedRandom(items, weights) {
|
|
174
|
+
const total = weights.reduce((a, b) => a + b, 0);
|
|
175
|
+
let random = Math.random() * total;
|
|
176
|
+
for (let i = 0; i < items.length; i++) {
|
|
177
|
+
random -= weights[i];
|
|
178
|
+
if (random <= 0) return items[i];
|
|
179
|
+
}
|
|
180
|
+
return items[0];
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// ============= Keyword Injection System =============
|
|
184
|
+
|
|
185
|
+
class KeywordInjector {
|
|
186
|
+
constructor(keywords = {}) {
|
|
187
|
+
this.keywords = keywords;
|
|
188
|
+
this.injectedKeywords = [];
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Inject keywords naturally into text
|
|
193
|
+
*/
|
|
194
|
+
inject(text, density = 0.15) {
|
|
195
|
+
if (!this.keywords || Object.keys(this.keywords).length === 0) return text;
|
|
196
|
+
|
|
197
|
+
this.injectedKeywords = [];
|
|
198
|
+
const sentences = text.split(/([.!?]+\s*)/);
|
|
199
|
+
const result = [];
|
|
200
|
+
|
|
201
|
+
for (let i = 0; i < sentences.length; i += 2) {
|
|
202
|
+
let sentence = sentences[i];
|
|
203
|
+
const punctuation = sentences[i + 1] || '';
|
|
204
|
+
|
|
205
|
+
if (sentence && chance(density * (1 - i * 0.1))) {
|
|
206
|
+
sentence = this.injectIntoSentence(sentence);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
result.push(sentence + punctuation);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return result.join('');
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
injectIntoSentence(sentence) {
|
|
216
|
+
const categories = Object.keys(this.keywords).filter(cat =>
|
|
217
|
+
this.keywords[cat] && this.keywords[cat].length > 0
|
|
218
|
+
);
|
|
219
|
+
|
|
220
|
+
if (categories.length === 0) return sentence;
|
|
221
|
+
|
|
222
|
+
const category = categories[Math.floor(Math.random() * categories.length)];
|
|
223
|
+
const keyword = this.keywords[category][
|
|
224
|
+
Math.floor(Math.random() * this.keywords[category].length)
|
|
225
|
+
];
|
|
226
|
+
|
|
227
|
+
if (!keyword) return sentence;
|
|
228
|
+
|
|
229
|
+
this.injectedKeywords.push(keyword);
|
|
230
|
+
|
|
231
|
+
// Natural injection patterns (enhanced with STYLE_MODIFIERS if available)
|
|
232
|
+
let patterns = [
|
|
233
|
+
{ regex: /\b(the feature|this feature|that feature)\b/i, template: keyword },
|
|
234
|
+
{ regex: /\b(the product|the app|the tool|the system)\b/i, template: keyword },
|
|
235
|
+
{ regex: /\b(especially|particularly|specifically)\b/i, template: `$1 ${keyword}` },
|
|
236
|
+
{ regex: /\b(broken|working|fast|slow|buggy)\b/i, template: `$1 (${keyword})` },
|
|
237
|
+
{ regex: /\b(version|release|update)\b/i, template: `$1 (${keyword})` },
|
|
238
|
+
];
|
|
239
|
+
|
|
240
|
+
// Add style-specific patterns if available
|
|
241
|
+
if (chance(0.2) && STYLE_MODIFIERS) {
|
|
242
|
+
if (STYLE_MODIFIERS.support && sentence.includes('error')) {
|
|
243
|
+
patterns.push({ regex: /\berror\b/i, template: `${keyword} error` });
|
|
244
|
+
}
|
|
245
|
+
if (STYLE_MODIFIERS.review && sentence.includes('good')) {
|
|
246
|
+
patterns.push({ regex: /\bgood\b/i, template: `good (${keyword})` });
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
for (const pattern of patterns) {
|
|
251
|
+
if (pattern.regex.test(sentence)) {
|
|
252
|
+
return sentence.replace(pattern.regex, pattern.template);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Fallback: natural insertion
|
|
257
|
+
if (chance(0.3)) {
|
|
258
|
+
const insertions = [
|
|
259
|
+
`. Specifically ${keyword}.`,
|
|
260
|
+
` - I mean ${keyword} -`,
|
|
261
|
+
` (talking about ${keyword})`,
|
|
262
|
+
`, especially ${keyword},`
|
|
263
|
+
];
|
|
264
|
+
const insertion = insertions[Math.floor(Math.random() * insertions.length)];
|
|
265
|
+
const insertPoint = sentence.lastIndexOf(' ');
|
|
266
|
+
if (insertPoint > 0) {
|
|
267
|
+
return sentence.slice(0, insertPoint) + insertion + sentence.slice(insertPoint);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
return sentence;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
getInjectedKeywords() {
|
|
275
|
+
return [...new Set(this.injectedKeywords)];
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ============= Authenticity Enhancement =============
|
|
280
|
+
|
|
281
|
+
class AuthenticityEnhancer {
|
|
282
|
+
constructor(level = 0.3) {
|
|
283
|
+
this.level = level;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
enhance(text, sentimentScore = 0, formality = 'casual') {
|
|
287
|
+
let enhanced = text;
|
|
288
|
+
|
|
289
|
+
// Apply various realistic patterns
|
|
290
|
+
if (chance(this.level * 0.4)) enhanced = this.addSelfCorrection(enhanced);
|
|
291
|
+
if (chance(this.level * 0.3)) enhanced = this.addTrailOff(enhanced);
|
|
292
|
+
if (chance(this.level * 0.3)) enhanced = this.addSpecificDetail(enhanced);
|
|
293
|
+
if (chance(this.level * 0.2)) enhanced = this.addPersonalContext(enhanced);
|
|
294
|
+
if (chance(this.level * 0.2)) enhanced = this.addEmotionalMarker(enhanced, sentimentScore, 'medium');
|
|
295
|
+
if (chance(this.level * 0.1)) enhanced = this.addMetaCommentary(enhanced);
|
|
296
|
+
if (chance(this.level * 0.15)) enhanced = this.addFormalityMarkers(enhanced, formality);
|
|
297
|
+
|
|
298
|
+
return enhanced;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
addFormalityMarkers(text, formality) {
|
|
302
|
+
if (formality === 'casual' && FORMALITY_MODIFIERS.casual) {
|
|
303
|
+
// Add casual contractions
|
|
304
|
+
if (chance(0.3) && FORMALITY_MODIFIERS.casual.contractions) {
|
|
305
|
+
text = text.replace(/\b(can not|cannot)\b/gi, "can't");
|
|
306
|
+
text = text.replace(/\b(will not)\b/gi, "won't");
|
|
307
|
+
text = text.replace(/\b(do not)\b/gi, "don't");
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Add casual slang occasionally
|
|
311
|
+
if (chance(0.2) && FORMALITY_MODIFIERS.casual.slang) {
|
|
312
|
+
const slang = FORMALITY_MODIFIERS.casual.slang;
|
|
313
|
+
const word = slang[Math.floor(Math.random() * slang.length)];
|
|
314
|
+
text = text.replace(/\b(going to)\b/i, word);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return text;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
addSelfCorrection(text) {
|
|
322
|
+
const corrections = [
|
|
323
|
+
{ find: /\bgood\b/, replace: 'good... well, decent' },
|
|
324
|
+
{ find: /\bbad\b/, replace: 'bad... terrible actually' },
|
|
325
|
+
{ find: /\bfast\b/, replace: 'fast (relatively speaking)' },
|
|
326
|
+
{ find: /\bslow\b/, replace: 'slow... painfully slow' },
|
|
327
|
+
];
|
|
328
|
+
|
|
329
|
+
const correction = corrections[Math.floor(Math.random() * corrections.length)];
|
|
330
|
+
return text.replace(correction.find, correction.replace);
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
addTrailOff(text) {
|
|
334
|
+
const trails = [
|
|
335
|
+
'...', '... whatever', '... I guess', '... you know?',
|
|
336
|
+
'... if that makes sense', '... or something'
|
|
337
|
+
];
|
|
338
|
+
|
|
339
|
+
if (text.endsWith('.')) {
|
|
340
|
+
return text.slice(0, -1) + trails[Math.floor(Math.random() * trails.length)];
|
|
341
|
+
}
|
|
342
|
+
return text;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
addSpecificDetail(text) {
|
|
346
|
+
const details = [
|
|
347
|
+
{ find: /\berror\b/i, replace: 'error (404 specifically)' },
|
|
348
|
+
{ find: /\bcrashes\b/i, replace: 'crashes (3-4 times daily)' },
|
|
349
|
+
{ find: /\bslow\b/i, replace: 'slow (30+ seconds)' },
|
|
350
|
+
{ find: /\bfast\b/i, replace: 'fast (under 100ms)' },
|
|
351
|
+
{ find: /\bexpensive\b/i, replace: 'expensive ($299/month)' },
|
|
352
|
+
];
|
|
353
|
+
|
|
354
|
+
for (const detail of details) {
|
|
355
|
+
if (detail.find.test(text)) {
|
|
356
|
+
return text.replace(detail.find, detail.replace);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return text;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
addPersonalContext(text) {
|
|
363
|
+
const contexts = [
|
|
364
|
+
'At my startup, ', 'In our enterprise setup, ', 'During the migration, ',
|
|
365
|
+
'After the latest update, ', 'Since going remote, ', 'In production, '
|
|
366
|
+
];
|
|
367
|
+
|
|
368
|
+
if (chance(0.5)) {
|
|
369
|
+
return contexts[Math.floor(Math.random() * contexts.length)] +
|
|
370
|
+
text.charAt(0).toLowerCase() + text.slice(1);
|
|
371
|
+
}
|
|
372
|
+
return text;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
addEmotionalMarker(text, sentimentScore, intensity = 'medium') {
|
|
376
|
+
if (Math.abs(sentimentScore) > 5) {
|
|
377
|
+
const markers = sentimentScore > 0
|
|
378
|
+
? ['!!!', ' 🎉', ' 💯', ' 🚀']
|
|
379
|
+
: ['...', ' 😤', ' 🤦', ' ugh'];
|
|
380
|
+
|
|
381
|
+
let result = text + markers[Math.floor(Math.random() * markers.length)];
|
|
382
|
+
|
|
383
|
+
// Add intensity amplifiers occasionally
|
|
384
|
+
if (chance(0.3) && INTENSITY_MODIFIERS[intensity]) {
|
|
385
|
+
const amplifiers = INTENSITY_MODIFIERS[intensity].amplifiers;
|
|
386
|
+
if (amplifiers) {
|
|
387
|
+
const amplifier = amplifiers[Math.floor(Math.random() * amplifiers.length)];
|
|
388
|
+
result = result.replace(/\b(is|was|feels?)\s+(\w+)/i, `$1 ${amplifier} $2`);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
return result;
|
|
393
|
+
}
|
|
394
|
+
return text;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
addMetaCommentary(text) {
|
|
398
|
+
const meta = ['EDIT: ', 'UPDATE: ', 'Note: ', 'BTW: ', 'PS: '];
|
|
399
|
+
|
|
400
|
+
if (chance(0.3)) {
|
|
401
|
+
return meta[Math.floor(Math.random() * meta.length)] + text;
|
|
402
|
+
}
|
|
403
|
+
return text;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// ============= Smart Typo Generator =============
|
|
408
|
+
|
|
409
|
+
class SmartTypoGenerator {
|
|
410
|
+
constructor() {
|
|
411
|
+
// Convert imported TYPO_PATTERNS array to expected object structure
|
|
412
|
+
if (TYPO_PATTERNS && Array.isArray(TYPO_PATTERNS)) {
|
|
413
|
+
// Convert the imported array format to the expected format
|
|
414
|
+
const convertedPatterns = TYPO_PATTERNS.map(item => ({
|
|
415
|
+
from: item.pattern,
|
|
416
|
+
to: item.replacements
|
|
417
|
+
}));
|
|
418
|
+
|
|
419
|
+
this.patterns = {
|
|
420
|
+
emotional: convertedPatterns,
|
|
421
|
+
mobile: convertedPatterns.slice(0, 3), // Use subset for mobile
|
|
422
|
+
technical: convertedPatterns.slice(0, 2) // Use subset for technical
|
|
423
|
+
};
|
|
424
|
+
} else {
|
|
425
|
+
// Fallback to built-in patterns
|
|
426
|
+
this.patterns = {
|
|
427
|
+
emotional: [
|
|
428
|
+
{ from: /\bthe\b/g, to: ['teh', 'hte', 'th'] },
|
|
429
|
+
{ from: /\byou\b/g, to: ['u', 'yuo', 'yu'] },
|
|
430
|
+
{ from: /\band\b/g, to: ['adn', 'an', 'nad'] },
|
|
431
|
+
{ from: /ing\b/g, to: ['ign', 'img', 'ing'] },
|
|
432
|
+
{ from: /tion\b/g, to: ['toin', 'tion', 'iton'] }
|
|
433
|
+
],
|
|
434
|
+
mobile: [
|
|
435
|
+
{ from: /\s+/g, to: [''] }, // Missing spaces
|
|
436
|
+
{ from: /([a-z])\1/g, to: ['$1'] }, // Missing double letters
|
|
437
|
+
],
|
|
438
|
+
technical: [
|
|
439
|
+
{ from: /\(\)/g, to: ['(', ')'] },
|
|
440
|
+
{ from: /\;/g, to: [':'] },
|
|
441
|
+
{ from: /\=/g, to: ['==', '='] }
|
|
442
|
+
]
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// Enhance with additional realistic patterns
|
|
447
|
+
if (chance(0.1)) {
|
|
448
|
+
this.addCommonTypos();
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
addCommonTypos() {
|
|
453
|
+
// Add more common typo patterns if not provided
|
|
454
|
+
this.patterns.emotional.push(
|
|
455
|
+
{ from: /\btheir\b/g, to: ['thier', 'there', 'they\'re'] },
|
|
456
|
+
{ from: /\byour\b/g, to: ['you\'re', 'ur', 'yur'] },
|
|
457
|
+
{ from: /\bwould\b/g, to: ['woudl', 'wolud', 'wuold'] }
|
|
458
|
+
);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
apply(text, rate = 0.02, style = 'general', sentimentScore = 0) {
|
|
462
|
+
// Higher typo rate when emotional
|
|
463
|
+
const emotionalMultiplier = 1 + (Math.abs(sentimentScore) / 20);
|
|
464
|
+
const adjustedRate = rate * emotionalMultiplier;
|
|
465
|
+
|
|
466
|
+
// Select appropriate patterns
|
|
467
|
+
const patternSet = Math.abs(sentimentScore) > 5 ? 'emotional' :
|
|
468
|
+
style === 'chat' ? 'mobile' :
|
|
469
|
+
style === 'support' ? 'technical' : 'emotional';
|
|
470
|
+
|
|
471
|
+
const patterns = this.patterns[patternSet];
|
|
472
|
+
let result = text;
|
|
473
|
+
let typoMomentum = 0;
|
|
474
|
+
|
|
475
|
+
const words = result.split(/(\s+)/);
|
|
476
|
+
|
|
477
|
+
for (let i = 0; i < words.length; i++) {
|
|
478
|
+
if (chance(adjustedRate * (1 + typoMomentum))) {
|
|
479
|
+
for (const pattern of patterns) {
|
|
480
|
+
if (pattern.from.test(words[i])) {
|
|
481
|
+
const replacement = pattern.to[Math.floor(Math.random() * pattern.to.length)];
|
|
482
|
+
words[i] = words[i].replace(pattern.from, replacement);
|
|
483
|
+
typoMomentum = 0.5; // Cluster typos
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
} else {
|
|
488
|
+
typoMomentum *= 0.7; // Decay
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
return words.join('');
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// ============= Mixed Sentiment Generator =============
|
|
497
|
+
|
|
498
|
+
class MixedSentimentGenerator {
|
|
499
|
+
generateNuanced(grammar, primaryTone, intensity = 'medium') {
|
|
500
|
+
const parts = [];
|
|
501
|
+
|
|
502
|
+
// Start with primary sentiment
|
|
503
|
+
parts.push(this.generateClause(grammar, primaryTone, intensity));
|
|
504
|
+
|
|
505
|
+
// Add contrasting view (30% chance)
|
|
506
|
+
if (chance(0.3)) {
|
|
507
|
+
const counterTone = primaryTone === 'pos' ? 'neg' :
|
|
508
|
+
primaryTone === 'neg' ? 'pos' : 'neu';
|
|
509
|
+
parts.push(`That said, ${this.generateClause(grammar, counterTone, 'low').toLowerCase()}`);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// Add neutral observation (20% chance)
|
|
513
|
+
if (chance(0.2)) {
|
|
514
|
+
parts.push(this.generateClause(grammar, 'neu', 'low'));
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Return to primary sentiment
|
|
518
|
+
if (parts.length > 1 && chance(0.5)) {
|
|
519
|
+
parts.push(`Overall though, ${this.generateClause(grammar, primaryTone, intensity).toLowerCase()}`);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
return parts.join('. ');
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
generateClause(grammar, tone, intensity) {
|
|
526
|
+
const key = `#origin_${tone}_${intensity}#`;
|
|
527
|
+
const fallback = `#origin_${tone}#`;
|
|
528
|
+
|
|
529
|
+
try {
|
|
530
|
+
return grammar.flatten(key);
|
|
531
|
+
} catch {
|
|
532
|
+
return grammar.flatten(fallback);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// ============= Main Generator Class =============
|
|
538
|
+
|
|
539
|
+
/**
|
|
540
|
+
* Text generator with realistic language patterns
|
|
541
|
+
*/
|
|
542
|
+
class RealisticTextGenerator {
|
|
543
|
+
/**
|
|
544
|
+
* Create a new generator instance
|
|
545
|
+
* @param {TextGeneratorConfig} config - Generator configuration
|
|
546
|
+
*/
|
|
547
|
+
constructor(config = {}) {
|
|
548
|
+
// Apply defaults
|
|
549
|
+
this.config = {
|
|
550
|
+
tone: 'neu',
|
|
551
|
+
style: 'feedback',
|
|
552
|
+
intensity: 'medium',
|
|
553
|
+
formality: 'casual',
|
|
554
|
+
min: 100,
|
|
555
|
+
max: 500,
|
|
556
|
+
keywordDensity: 0.15,
|
|
557
|
+
typos: false,
|
|
558
|
+
typoRate: 0.02,
|
|
559
|
+
mixedSentiment: true,
|
|
560
|
+
authenticityLevel: 0.3,
|
|
561
|
+
timestamps: false,
|
|
562
|
+
userPersona: false,
|
|
563
|
+
sentimentDrift: 0.2,
|
|
564
|
+
includeMetadata: true,
|
|
565
|
+
specificityLevel: 0.5,
|
|
566
|
+
enableDeduplication: true,
|
|
567
|
+
maxAttempts: 50,
|
|
568
|
+
// performanceMode removed - we're always optimized now
|
|
569
|
+
...config
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
// System is now always optimized for speed + uniqueness
|
|
573
|
+
|
|
574
|
+
// Always optimize for speed but maintain uniqueness
|
|
575
|
+
this.config.maxAttempts = Math.min(this.config.maxAttempts, 25); // Reasonable limit
|
|
576
|
+
|
|
577
|
+
// Initialize RNG if seed provided
|
|
578
|
+
if (this.config.seed) {
|
|
579
|
+
seedrandom(this.config.seed, { global: true });
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// Initialize components
|
|
583
|
+
this.grammar = this.createGrammar();
|
|
584
|
+
this.keywordInjector = new KeywordInjector(this.config.keywords);
|
|
585
|
+
this.authenticityEnhancer = new AuthenticityEnhancer(this.config.authenticityLevel);
|
|
586
|
+
this.typoGenerator = new SmartTypoGenerator();
|
|
587
|
+
this.mixedSentimentGen = new MixedSentimentGenerator();
|
|
588
|
+
|
|
589
|
+
// Initialize diversity and caching systems
|
|
590
|
+
this.recentCache = new RecentTextCache(100); // Fast recent duplicate detection
|
|
591
|
+
const seedValue = typeof this.config.seed === 'string' ? parseInt(this.config.seed) : (this.config.seed || Date.now());
|
|
592
|
+
this.diversityRNG = new DiversityRNG(seedValue);
|
|
593
|
+
|
|
594
|
+
// Legacy deduplication tracking (kept for compatibility)
|
|
595
|
+
this.generatedHashes = new Set();
|
|
596
|
+
this.recentTexts = [];
|
|
597
|
+
this.maxRecentTexts = 100; // Limit memory usage and comparison time
|
|
598
|
+
this.currentTone = this.config.tone;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Create Tracery grammar from phrase bank
|
|
603
|
+
* @private
|
|
604
|
+
*/
|
|
605
|
+
createGrammar() {
|
|
606
|
+
const g = tracery.createGrammar(PHRASE_BANK);
|
|
607
|
+
g.addModifiers(tracery.baseEngModifiers);
|
|
608
|
+
return g;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
/**
|
|
612
|
+
* Generate a single text item
|
|
613
|
+
* @param {string} [tone] - Override tone for this generation
|
|
614
|
+
* @returns {GeneratedText|string|null} Generated text or null if failed
|
|
615
|
+
*/
|
|
616
|
+
generateOne(tone = this.currentTone) {
|
|
617
|
+
for (let attempt = 0; attempt < this.config.maxAttempts; attempt++) {
|
|
618
|
+
// Allow sentiment drift
|
|
619
|
+
if (this.config.sentimentDrift > 0 && chance(this.config.sentimentDrift)) {
|
|
620
|
+
this.currentTone = this.driftTone(this.currentTone);
|
|
621
|
+
tone = this.currentTone;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// Generate base text
|
|
625
|
+
let text = this.generateBaseText(tone);
|
|
626
|
+
if (!text) continue;
|
|
627
|
+
|
|
628
|
+
// Apply enhancements
|
|
629
|
+
text = this.applyEnhancements(text, tone);
|
|
630
|
+
|
|
631
|
+
// Check constraints
|
|
632
|
+
if (!this.meetsConstraints(text, tone)) continue;
|
|
633
|
+
|
|
634
|
+
// Check for duplicates
|
|
635
|
+
if (this.config.enableDeduplication && this.isDuplicate(text)) continue;
|
|
636
|
+
|
|
637
|
+
// Add to recent cache for fast future duplicate detection
|
|
638
|
+
this.recentCache.add(text);
|
|
639
|
+
|
|
640
|
+
// Legacy deduplication tracking handled elsewhere if enabled
|
|
641
|
+
|
|
642
|
+
// Return based on metadata preference
|
|
643
|
+
if (this.config.includeMetadata) {
|
|
644
|
+
return this.createTextObject(text, tone);
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
return text;
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
return null;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Generate base text from grammar
|
|
655
|
+
* @private
|
|
656
|
+
*/
|
|
657
|
+
generateBaseText(tone) {
|
|
658
|
+
// Use diversity RNG for varied generation
|
|
659
|
+
return this.diversityRNG.withDiversity(() => {
|
|
660
|
+
// Use mixed sentiment sometimes
|
|
661
|
+
if (this.config.mixedSentiment && chance(0.3)) {
|
|
662
|
+
return this.mixedSentimentGen.generateNuanced(
|
|
663
|
+
this.grammar,
|
|
664
|
+
tone,
|
|
665
|
+
this.config.intensity
|
|
666
|
+
);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Generate from appropriate origin pattern
|
|
670
|
+
const styleKey = `#origin_${this.config.style}_${tone}#`;
|
|
671
|
+
const fallbackKey = `#origin_${tone}#`;
|
|
672
|
+
|
|
673
|
+
try {
|
|
674
|
+
return this.cleanText(this.grammar.flatten(styleKey));
|
|
675
|
+
} catch {
|
|
676
|
+
return this.cleanText(this.grammar.flatten(fallbackKey));
|
|
677
|
+
}
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
/**
|
|
682
|
+
* Apply all enhancement layers
|
|
683
|
+
* @private
|
|
684
|
+
*/
|
|
685
|
+
applyEnhancements(text, tone) {
|
|
686
|
+
const sentScore = sentiment.analyze(text).score;
|
|
687
|
+
|
|
688
|
+
// 1. Keyword injection
|
|
689
|
+
if (this.config.keywords) {
|
|
690
|
+
text = this.keywordInjector.inject(text, this.config.keywordDensity);
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
// 2. Authenticity markers
|
|
694
|
+
text = this.authenticityEnhancer.enhance(text, sentScore, this.config.formality);
|
|
695
|
+
|
|
696
|
+
// 3. Length adjustment
|
|
697
|
+
text = this.adjustLength(text, tone);
|
|
698
|
+
|
|
699
|
+
// 4. Smart typos
|
|
700
|
+
if (this.config.typos) {
|
|
701
|
+
text = this.typoGenerator.apply(
|
|
702
|
+
text,
|
|
703
|
+
this.config.typoRate,
|
|
704
|
+
this.config.style,
|
|
705
|
+
sentScore
|
|
706
|
+
);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
return text;
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
/**
|
|
713
|
+
* Adjust text to meet length requirements
|
|
714
|
+
* @private
|
|
715
|
+
*/
|
|
716
|
+
adjustLength(text, tone) {
|
|
717
|
+
const currentLength = text.length;
|
|
718
|
+
|
|
719
|
+
if (currentLength < this.config.min) {
|
|
720
|
+
// Add more content
|
|
721
|
+
const additions = [
|
|
722
|
+
this.grammar.flatten(`#clause_${tone}#`),
|
|
723
|
+
this.grammar.flatten(`#react_${tone}#`),
|
|
724
|
+
this.grammar.flatten(`#obs_${tone}#`)
|
|
725
|
+
];
|
|
726
|
+
|
|
727
|
+
while (text.length < this.config.min) {
|
|
728
|
+
const addition = additions[Math.floor(Math.random() * additions.length)];
|
|
729
|
+
text += '. ' + this.cleanText(addition);
|
|
730
|
+
|
|
731
|
+
if (text.length > this.config.min * 1.2) break;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
if (text.length > this.config.max) {
|
|
736
|
+
// Smart truncation at sentence boundary
|
|
737
|
+
const truncated = text.slice(0, this.config.max);
|
|
738
|
+
const lastPeriod = truncated.lastIndexOf('.');
|
|
739
|
+
|
|
740
|
+
if (lastPeriod > this.config.min * 0.8) {
|
|
741
|
+
text = truncated.slice(0, lastPeriod + 1);
|
|
742
|
+
} else {
|
|
743
|
+
text = truncated.slice(0, this.config.max - 3) + '...';
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
return text;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
/**
|
|
751
|
+
* Check if text meets all constraints (optimized)
|
|
752
|
+
* @private
|
|
753
|
+
*/
|
|
754
|
+
meetsConstraints(text, tone) {
|
|
755
|
+
// Length check (fast)
|
|
756
|
+
if (text.length < this.config.min || text.length > this.config.max) {
|
|
757
|
+
return false;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Fast recent duplicate check (always enabled for uniqueness)
|
|
761
|
+
if (this.recentCache.has(text)) {
|
|
762
|
+
return false;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
// Skip expensive sentiment analysis for neutral tone or when mixed sentiment is enabled
|
|
766
|
+
if (tone === 'neu' || this.config.mixedSentiment) {
|
|
767
|
+
return true; // Accept most content for neutral/mixed sentiment
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// Only do sentiment analysis when strict tone matching is needed
|
|
771
|
+
const score = sentiment.analyze(text).score;
|
|
772
|
+
const tolerance = 2; // Relaxed tolerance for performance
|
|
773
|
+
|
|
774
|
+
if (tone === 'pos' && score < -tolerance) return false;
|
|
775
|
+
if (tone === 'neg' && score > tolerance) return false;
|
|
776
|
+
|
|
777
|
+
return true;
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
/**
|
|
781
|
+
* Fast duplicate detection (much more efficient than SimHash)
|
|
782
|
+
* @private
|
|
783
|
+
*/
|
|
784
|
+
isDuplicate(text) {
|
|
785
|
+
if (!this.config.enableDeduplication) return false;
|
|
786
|
+
|
|
787
|
+
// Quick exact duplicate check
|
|
788
|
+
const hash = fastHash(text);
|
|
789
|
+
if (this.generatedHashes.has(hash)) {
|
|
790
|
+
return true;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
// Only check similarity against recent texts (not all texts)
|
|
794
|
+
const pattern = getWordPattern(text);
|
|
795
|
+
for (const recentText of this.recentTexts) {
|
|
796
|
+
if (calculateSimilarity(text, recentText) > 0.8) {
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// Add to tracking structures
|
|
802
|
+
this.generatedHashes.add(hash);
|
|
803
|
+
this.recentTexts.push(text);
|
|
804
|
+
|
|
805
|
+
// Keep only recent texts to limit memory and comparison time
|
|
806
|
+
if (this.recentTexts.length > this.maxRecentTexts) {
|
|
807
|
+
this.recentTexts.shift();
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
return false;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
/**
|
|
814
|
+
* Create text object with metadata (optimized)
|
|
815
|
+
* @private
|
|
816
|
+
*/
|
|
817
|
+
createTextObject(text, tone) {
|
|
818
|
+
const metadata = {
|
|
819
|
+
style: this.config.style,
|
|
820
|
+
intensity: this.config.intensity,
|
|
821
|
+
formality: this.config.formality
|
|
822
|
+
};
|
|
823
|
+
|
|
824
|
+
// Only calculate expensive metrics when really needed
|
|
825
|
+
if (this.config.includeMetadata) {
|
|
826
|
+
// Sentiment analysis is expensive - only do it occasionally or when needed
|
|
827
|
+
if (chance(0.3)) {
|
|
828
|
+
metadata.sentimentScore = sentiment.analyze(text).score;
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
// Add timestamp if enabled
|
|
832
|
+
if (this.config.timestamps && chance(0.3)) {
|
|
833
|
+
const hour = Math.floor(Math.random() * 24);
|
|
834
|
+
const min = Math.floor(Math.random() * 60);
|
|
835
|
+
metadata.timestamp = `${hour}:${min.toString().padStart(2, '0')}`;
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
// Add persona if enabled
|
|
839
|
+
if (this.config.userPersona && chance(0.4)) {
|
|
840
|
+
metadata.persona = this.generatePersona();
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
// Add injected keywords
|
|
844
|
+
const keywords = this.keywordInjector.getInjectedKeywords();
|
|
845
|
+
if (keywords.length > 0) {
|
|
846
|
+
metadata.injectedKeywords = keywords;
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
// Readability calculation is also expensive - only occasionally
|
|
850
|
+
if (chance(0.2)) {
|
|
851
|
+
metadata.readabilityScore = this.calculateReadability(text);
|
|
852
|
+
}
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
return {
|
|
856
|
+
text,
|
|
857
|
+
tone,
|
|
858
|
+
metadata
|
|
859
|
+
};
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
/**
|
|
863
|
+
* Generate user persona
|
|
864
|
+
* @private
|
|
865
|
+
*/
|
|
866
|
+
generatePersona() {
|
|
867
|
+
// Use USER_PERSONAS if available, otherwise fallback
|
|
868
|
+
if (USER_PERSONAS && USER_PERSONAS.length > 0 && chance(0.8)) {
|
|
869
|
+
const persona = USER_PERSONAS[Math.floor(Math.random() * USER_PERSONAS.length)];
|
|
870
|
+
return {
|
|
871
|
+
role: persona.role,
|
|
872
|
+
experience: persona.experience[Math.floor(Math.random() * persona.experience.length)],
|
|
873
|
+
domain: persona.domain[Math.floor(Math.random() * persona.domain.length)],
|
|
874
|
+
speech_pattern: persona.speech_patterns[Math.floor(Math.random() * persona.speech_patterns.length)]
|
|
875
|
+
};
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
// Fallback persona generation
|
|
879
|
+
const roles = ['developer', 'designer', 'manager', 'analyst', 'user', 'admin'];
|
|
880
|
+
const experience = ['junior', 'senior', 'lead', 'expert', 'new'];
|
|
881
|
+
const domains = ['startup', 'enterprise', 'agency', 'nonprofit'];
|
|
882
|
+
|
|
883
|
+
return {
|
|
884
|
+
role: roles[Math.floor(Math.random() * roles.length)],
|
|
885
|
+
experience: experience[Math.floor(Math.random() * experience.length)],
|
|
886
|
+
domain: domains[Math.floor(Math.random() * domains.length)]
|
|
887
|
+
};
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
/**
|
|
891
|
+
* Calculate Flesch Reading Ease score
|
|
892
|
+
* @private
|
|
893
|
+
*/
|
|
894
|
+
calculateReadability(text) {
|
|
895
|
+
const words = text.split(/\s+/).length;
|
|
896
|
+
const sentences = text.split(/[.!?]+/).length;
|
|
897
|
+
const syllables = text.split(/\s+/).reduce((sum, word) =>
|
|
898
|
+
sum + this.countSyllables(word), 0);
|
|
899
|
+
|
|
900
|
+
const score = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words);
|
|
901
|
+
return Math.max(0, Math.min(100, Math.round(score)));
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
/**
|
|
905
|
+
* Count syllables in a word (approximation)
|
|
906
|
+
* @private
|
|
907
|
+
*/
|
|
908
|
+
countSyllables(word) {
|
|
909
|
+
word = word.toLowerCase().replace(/[^a-z]/g, '');
|
|
910
|
+
const vowels = word.match(/[aeiou]/g);
|
|
911
|
+
return vowels ? vowels.length : 1;
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
/**
|
|
915
|
+
* Clean generated text
|
|
916
|
+
* @private
|
|
917
|
+
*/
|
|
918
|
+
cleanText(text) {
|
|
919
|
+
return text
|
|
920
|
+
.replace(/\s+/g, ' ')
|
|
921
|
+
.replace(/\s([,.!?;:])/g, '$1')
|
|
922
|
+
.replace(/\s+([.!?])/g, '$1')
|
|
923
|
+
.trim();
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
/**
|
|
927
|
+
* Drift tone naturally
|
|
928
|
+
* @private
|
|
929
|
+
*/
|
|
930
|
+
driftTone(currentTone) {
|
|
931
|
+
const drifts = {
|
|
932
|
+
'pos': chance(0.7) ? 'pos' : (chance(0.8) ? 'neu' : 'neg'),
|
|
933
|
+
'neg': chance(0.7) ? 'neg' : (chance(0.8) ? 'neu' : 'pos'),
|
|
934
|
+
'neu': chance(0.5) ? 'neu' : (chance(0.5) ? 'pos' : 'neg')
|
|
935
|
+
};
|
|
936
|
+
|
|
937
|
+
return drifts[currentTone] || currentTone;
|
|
938
|
+
}
|
|
939
|
+
|
|
940
|
+
/**
|
|
941
|
+
* Generate multiple text items (alias for generateBatch)
|
|
942
|
+
* @param {TextBatchOptions} options - Batch generation options
|
|
943
|
+
* @returns {(string|GeneratedText|SimpleGeneratedText)[]} Array of generated texts
|
|
944
|
+
*/
|
|
945
|
+
generateMany(options) {
|
|
946
|
+
return this.generateBatch(options);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
/**
|
|
950
|
+
* Generate one text with randomized semantic options
|
|
951
|
+
* @returns {string|GeneratedText|null} Generated text
|
|
952
|
+
*/
|
|
953
|
+
generateRandom() {
|
|
954
|
+
// Randomize unspecified semantic options for variety
|
|
955
|
+
const randomTone = ['pos', 'neg', 'neu'][Math.floor(Math.random() * 3)];
|
|
956
|
+
const randomStyle = ['support', 'review', 'search', 'feedback', 'chat'][Math.floor(Math.random() * 5)];
|
|
957
|
+
const randomIntensity = ['low', 'medium', 'high'][Math.floor(Math.random() * 3)];
|
|
958
|
+
|
|
959
|
+
// Use random values occasionally
|
|
960
|
+
const tone = chance(0.3) ? randomTone : this.config.tone;
|
|
961
|
+
const tempStyle = chance(0.2) ? randomStyle : this.config.style;
|
|
962
|
+
const tempIntensity = chance(0.2) ? randomIntensity : this.config.intensity;
|
|
963
|
+
|
|
964
|
+
// Temporarily adjust config for this generation
|
|
965
|
+
const originalStyle = this.config.style;
|
|
966
|
+
const originalIntensity = this.config.intensity;
|
|
967
|
+
|
|
968
|
+
this.config.style = tempStyle;
|
|
969
|
+
this.config.intensity = tempIntensity;
|
|
970
|
+
|
|
971
|
+
const result = this.generateOne(tone);
|
|
972
|
+
|
|
973
|
+
// Restore original config
|
|
974
|
+
this.config.style = originalStyle;
|
|
975
|
+
this.config.intensity = originalIntensity;
|
|
976
|
+
|
|
977
|
+
return result;
|
|
978
|
+
}
|
|
979
|
+
|
|
980
|
+
/**
|
|
981
|
+
* Generate multiple text items
|
|
982
|
+
* @param {TextBatchOptions} options - Batch generation options
|
|
983
|
+
* @returns {(string|GeneratedText|SimpleGeneratedText)[]} Array of generated texts
|
|
984
|
+
*/
|
|
985
|
+
generateBatch(options) {
|
|
986
|
+
const {
|
|
987
|
+
n = 10,
|
|
988
|
+
returnType = 'strings',
|
|
989
|
+
tone = this.config.tone,
|
|
990
|
+
related = false,
|
|
991
|
+
sharedContext = null
|
|
992
|
+
} = options;
|
|
993
|
+
|
|
994
|
+
const results = [];
|
|
995
|
+
|
|
996
|
+
// Reset for new batch
|
|
997
|
+
this.currentTone = tone;
|
|
998
|
+
|
|
999
|
+
// Generate shared context if related
|
|
1000
|
+
let context = sharedContext;
|
|
1001
|
+
if (related && !context) {
|
|
1002
|
+
const contexts = ['new feature', 'recent update', 'pricing change', 'UI redesign'];
|
|
1003
|
+
context = contexts[Math.floor(Math.random() * contexts.length)];
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
for (let i = 0; i < n; i++) {
|
|
1007
|
+
let item = this.generateOne(tone);
|
|
1008
|
+
|
|
1009
|
+
if (!item) continue;
|
|
1010
|
+
|
|
1011
|
+
// Add shared context if related
|
|
1012
|
+
if (related && context) {
|
|
1013
|
+
const text = typeof item === 'string' ? item : item.text;
|
|
1014
|
+
const contextualText = this.addSharedContext(text, context);
|
|
1015
|
+
|
|
1016
|
+
if (typeof item === 'string') {
|
|
1017
|
+
item = contextualText;
|
|
1018
|
+
} else {
|
|
1019
|
+
item.text = contextualText;
|
|
1020
|
+
}
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
// Format based on return type
|
|
1024
|
+
if (returnType === 'strings') {
|
|
1025
|
+
results.push(typeof item === 'string' ? item : item.text);
|
|
1026
|
+
} else {
|
|
1027
|
+
results.push(typeof item === 'string' ? { text: item, tone } : item);
|
|
1028
|
+
}
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
return results;
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
/**
|
|
1035
|
+
* Add shared context to text
|
|
1036
|
+
* @private
|
|
1037
|
+
*/
|
|
1038
|
+
addSharedContext(text, context) {
|
|
1039
|
+
// Use discourse markers occasionally for more natural transitions
|
|
1040
|
+
if (chance(0.3) && DISCOURSE_MARKERS) {
|
|
1041
|
+
const markers = DISCOURSE_MARKERS.opinion || ['I think', 'In my opinion', 'From my experience'];
|
|
1042
|
+
const marker = markers[Math.floor(Math.random() * markers.length)];
|
|
1043
|
+
return `${marker}, regarding the ${context}: ${text.toLowerCase()}`;
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
if (chance(0.5)) {
|
|
1047
|
+
return `About the ${context}: ${text}`;
|
|
1048
|
+
} else if (chance(0.5)) {
|
|
1049
|
+
return text.replace(/\. /, `. Regarding the ${context}, `);
|
|
1050
|
+
}
|
|
1051
|
+
return text;
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
/**
|
|
1055
|
+
* Get generator statistics
|
|
1056
|
+
* @returns {Object} Statistics about generation
|
|
1057
|
+
*/
|
|
1058
|
+
getStats() {
|
|
1059
|
+
return {
|
|
1060
|
+
generatedCount: this.generatedHashes.size,
|
|
1061
|
+
currentTone: this.currentTone,
|
|
1062
|
+
config: this.config,
|
|
1063
|
+
cacheSize: this.generatedHashes.size
|
|
1064
|
+
};
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
/**
|
|
1068
|
+
* Reset generator state
|
|
1069
|
+
*/
|
|
1070
|
+
reset() {
|
|
1071
|
+
this.generatedHashes.clear();
|
|
1072
|
+
this.currentTone = this.config.tone;
|
|
1073
|
+
this.keywordInjector = new KeywordInjector(this.config.keywords);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
// ============= Public API =============
|
|
1078
|
+
|
|
1079
|
+
/**
|
|
1080
|
+
* Create a new text generator instance
|
|
1081
|
+
* @param {TextGeneratorConfig} config - Configuration options
|
|
1082
|
+
* @returns {TextGenerator} Generator instance
|
|
1083
|
+
*/
|
|
1084
|
+
export function createGenerator(config = {}) {
|
|
1085
|
+
return new RealisticTextGenerator(config);
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
/**
|
|
1089
|
+
* Alias for createGenerator (backwards compatibility)
|
|
1090
|
+
* @param {TextGeneratorConfig} config - Configuration options
|
|
1091
|
+
* @returns {TextGenerator} Generator instance
|
|
1092
|
+
*/
|
|
1093
|
+
export function buildGenerator(config = {}) {
|
|
1094
|
+
return new RealisticTextGenerator(config);
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
/**
|
|
1098
|
+
* Generate a single text item (convenience function)
|
|
1099
|
+
* @param {TextGeneratorConfig} config - Configuration options
|
|
1100
|
+
* @returns {string|GeneratedText|null} Generated text
|
|
1101
|
+
*/
|
|
1102
|
+
export function generateOne(config = {}) {
|
|
1103
|
+
const generator = new RealisticTextGenerator(config);
|
|
1104
|
+
return generator.generateOne();
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
/**
|
|
1108
|
+
* Generate multiple text items (convenience function)
|
|
1109
|
+
* @param {TextGeneratorConfig & TextBatchOptions} options - Configuration and batch options
|
|
1110
|
+
* @returns {(string|GeneratedText|SimpleGeneratedText)[]} Array of generated texts
|
|
1111
|
+
*/
|
|
1112
|
+
export function generateBatch(options) {
|
|
1113
|
+
const { n, returnType, tone, related, sharedContext, ...config } = options;
|
|
1114
|
+
const generator = new RealisticTextGenerator(config);
|
|
1115
|
+
return generator.generateBatch({ n, returnType, tone, related, sharedContext });
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
// Export main class as default
|
|
1119
|
+
export default RealisticTextGenerator;
|
|
1120
|
+
|
|
1121
|
+
|