make-mp-data 2.0.23 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dungeons/ai-chat-analytics-ed.js +274 -0
  2. package/dungeons/business.js +0 -1
  3. package/dungeons/complex.js +0 -1
  4. package/dungeons/experiments.js +0 -1
  5. package/dungeons/gaming.js +47 -14
  6. package/dungeons/media.js +5 -6
  7. package/dungeons/mil.js +296 -0
  8. package/dungeons/money2020-ed-also.js +277 -0
  9. package/dungeons/money2020-ed.js +579 -0
  10. package/dungeons/sanity.js +0 -1
  11. package/dungeons/scd.js +0 -1
  12. package/dungeons/simple.js +57 -18
  13. package/dungeons/student-teacher.js +0 -1
  14. package/dungeons/text-generation.js +706 -0
  15. package/dungeons/userAgent.js +1 -2
  16. package/entry.js +3 -0
  17. package/index.js +8 -36
  18. package/lib/cli/cli.js +0 -7
  19. package/lib/core/config-validator.js +6 -8
  20. package/lib/generators/adspend.js +1 -1
  21. package/lib/generators/events.js +1 -1
  22. package/lib/generators/funnels.js +293 -242
  23. package/lib/generators/text-bak-old.js +1121 -0
  24. package/lib/generators/text.js +1173 -0
  25. package/lib/orchestrators/mixpanel-sender.js +1 -1
  26. package/lib/templates/abbreviated.d.ts +13 -3
  27. package/lib/templates/defaults.js +311 -169
  28. package/lib/templates/hooks-instructions.txt +434 -0
  29. package/lib/templates/phrases-bak.js +925 -0
  30. package/lib/templates/phrases.js +2066 -0
  31. package/lib/templates/{instructions.txt → schema-instructions.txt} +78 -1
  32. package/lib/templates/scratch-dungeon-template.js +1 -1
  33. package/lib/templates/textQuickTest.js +172 -0
  34. package/lib/utils/ai.js +51 -2
  35. package/lib/utils/utils.js +29 -18
  36. package/package.json +7 -5
  37. package/types.d.ts +319 -4
  38. package/lib/utils/chart.js +0 -206
@@ -0,0 +1,1121 @@
1
+ /**
2
+ * Realistic Text Generation Module
3
+ * Generates authentic-feeling unstructured text with natural language patterns
4
+ * @module text
5
+ */
6
+
7
+ /* cSpell:disable */
8
+
9
+ import tracery from 'tracery-grammar';
10
+ import seedrandom from 'seedrandom';
11
+ import crypto from 'crypto';
12
+ import SentimentPkg from 'sentiment';
13
+ import {
14
+ PHRASE_BANK,
15
+ FORMALITY_MODIFIERS,
16
+ INTENSITY_MODIFIERS,
17
+ STYLE_MODIFIERS,
18
+ USER_PERSONAS,
19
+ DISCOURSE_MARKERS,
20
+ TYPO_PATTERNS
21
+ } from '../templates/phrases.js';
22
+ const {NODE_ENV = "unknown"} = process.env;
23
+
24
+ const Sentiment = typeof SentimentPkg === 'function' ? SentimentPkg : SentimentPkg.default;
25
+ const sentiment = new Sentiment();
26
+
27
+ /**
28
+ * @typedef {import('../../types.js').TextTone} TextTone
29
+ * @typedef {import('../../types.js').TextStyle} TextStyle
30
+ * @typedef {import('../../types.js').TextIntensity} TextIntensity
31
+ * @typedef {import('../../types.js').TextFormality} TextFormality
32
+ * @typedef {import('../../types.js').TextReturnType} TextReturnType
33
+ * @typedef {import('../../types.js').TextKeywordSet} TextKeywordSet
34
+ * @typedef {import('../../types.js').TextGeneratorConfig} TextGeneratorConfig
35
+ * @typedef {import('../../types.js').TextMetadata} TextMetadata
36
+ * @typedef {import('../../types.js').SimpleGeneratedText} SimpleGeneratedText
37
+ * @typedef {import('../../types.js').GeneratedText} GeneratedText
38
+ * @typedef {import('../../types.js').TextBatchOptions} TextBatchOptions
39
+ * @typedef {import('../../types.js').TextGeneratorStats} TextGeneratorStats
40
+ * @typedef {import('../../types.js').TextGenerator} TextGenerator
41
+ */
42
+
43
+ // ============= Core Utilities =============
44
+
45
+ /**
46
+ * Fast hash function for duplicate detection (much faster than SimHash)
47
+ * @private
48
+ */
49
+ function fastHash(text) {
50
+ // Simple but effective hash for duplicate detection
51
+ const normalized = text.toLowerCase().replace(/[^a-z0-9\s]/g, '').replace(/\s+/g, ' ').trim();
52
+ let hash = 0;
53
+ for (let i = 0; i < normalized.length; i++) {
54
+ const char = normalized.charCodeAt(i);
55
+ hash = ((hash << 5) - hash) + char;
56
+ hash = hash & hash; // Convert to 32-bit integer
57
+ }
58
+ return hash;
59
+ }
60
+
61
+ /**
62
+ * Ring buffer for recent text tracking (prevents recent duplicates efficiently)
63
+ * @private
64
+ */
65
+ class RecentTextCache {
66
+ constructor(size = 100) {
67
+ this.maxSize = size;
68
+ this.texts = new Set();
69
+ this.hashes = new Set();
70
+ this.queue = [];
71
+ }
72
+
73
+ has(text) {
74
+ const hash = fastHash(text);
75
+ return this.hashes.has(hash) || this.texts.has(text);
76
+ }
77
+
78
+ add(text) {
79
+ const hash = fastHash(text);
80
+
81
+ // Remove oldest if at capacity
82
+ if (this.queue.length >= this.maxSize) {
83
+ const oldest = this.queue.shift();
84
+ this.texts.delete(oldest);
85
+ this.hashes.delete(fastHash(oldest));
86
+ }
87
+
88
+ // Add new
89
+ this.queue.push(text);
90
+ this.texts.add(text);
91
+ this.hashes.add(hash);
92
+ }
93
+
94
+ size() {
95
+ return this.queue.length;
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Simple PRNG for deterministic but diverse generation
101
+ * @private
102
+ */
103
+ class DiversityRNG {
104
+ constructor(baseSeed = Math.floor(Math.random() * 1000000)) {
105
+ this.baseSeed = baseSeed;
106
+ this.counter = 0;
107
+ }
108
+
109
+ next() {
110
+ this.counter++;
111
+ let seed = this.baseSeed + this.counter * 9301;
112
+ seed = (seed * 9301 + 49297) % 233280;
113
+ return seed / 233280;
114
+ }
115
+
116
+ // Create temporary Math.random override for diversity
117
+ withDiversity(fn) {
118
+ const originalRandom = Math.random;
119
+ Math.random = () => this.next();
120
+
121
+ try {
122
+ return fn();
123
+ } finally {
124
+ Math.random = originalRandom;
125
+ }
126
+ }
127
+ }
128
+
129
+ /**
130
+ * Extract word patterns for similarity detection
131
+ * @private
132
+ */
133
+ function getWordPattern(text) {
134
+ const words = text.toLowerCase()
135
+ .replace(/[^a-z0-9\s]/g, ' ')
136
+ .split(/\s+/)
137
+ .filter(Boolean);
138
+
139
+ if (words.length <= 3) return words.join('|');
140
+
141
+ // Use first 3 and last 3 words for pattern matching
142
+ const start = words.slice(0, 3).join('|');
143
+ const end = words.slice(-3).join('|');
144
+ return `${start}...${end}`;
145
+ }
146
+
147
+ /**
148
+ * Calculate simple similarity score (0-1)
149
+ * @private
150
+ */
151
+ function calculateSimilarity(text1, text2) {
152
+ const words1 = new Set(text1.toLowerCase().split(/\s+/));
153
+ const words2 = new Set(text2.toLowerCase().split(/\s+/));
154
+
155
+ const intersection = new Set([...words1].filter(x => words2.has(x)));
156
+ const union = new Set([...words1, ...words2]);
157
+
158
+ return intersection.size / union.size;
159
+ }
160
+
161
+ /**
162
+ * Random chance helper
163
+ * @private
164
+ */
165
+ function chance(probability) {
166
+ return Math.random() < probability;
167
+ }
168
+
169
+ /**
170
+ * Pick weighted random item from array
171
+ * @private
172
+ */
173
+ function weightedRandom(items, weights) {
174
+ const total = weights.reduce((a, b) => a + b, 0);
175
+ let random = Math.random() * total;
176
+ for (let i = 0; i < items.length; i++) {
177
+ random -= weights[i];
178
+ if (random <= 0) return items[i];
179
+ }
180
+ return items[0];
181
+ }
182
+
183
+ // ============= Keyword Injection System =============
184
+
185
+ class KeywordInjector {
186
+ constructor(keywords = {}) {
187
+ this.keywords = keywords;
188
+ this.injectedKeywords = [];
189
+ }
190
+
191
+ /**
192
+ * Inject keywords naturally into text
193
+ */
194
+ inject(text, density = 0.15) {
195
+ if (!this.keywords || Object.keys(this.keywords).length === 0) return text;
196
+
197
+ this.injectedKeywords = [];
198
+ const sentences = text.split(/([.!?]+\s*)/);
199
+ const result = [];
200
+
201
+ for (let i = 0; i < sentences.length; i += 2) {
202
+ let sentence = sentences[i];
203
+ const punctuation = sentences[i + 1] || '';
204
+
205
+ if (sentence && chance(density * (1 - i * 0.1))) {
206
+ sentence = this.injectIntoSentence(sentence);
207
+ }
208
+
209
+ result.push(sentence + punctuation);
210
+ }
211
+
212
+ return result.join('');
213
+ }
214
+
215
+ injectIntoSentence(sentence) {
216
+ const categories = Object.keys(this.keywords).filter(cat =>
217
+ this.keywords[cat] && this.keywords[cat].length > 0
218
+ );
219
+
220
+ if (categories.length === 0) return sentence;
221
+
222
+ const category = categories[Math.floor(Math.random() * categories.length)];
223
+ const keyword = this.keywords[category][
224
+ Math.floor(Math.random() * this.keywords[category].length)
225
+ ];
226
+
227
+ if (!keyword) return sentence;
228
+
229
+ this.injectedKeywords.push(keyword);
230
+
231
+ // Natural injection patterns (enhanced with STYLE_MODIFIERS if available)
232
+ let patterns = [
233
+ { regex: /\b(the feature|this feature|that feature)\b/i, template: keyword },
234
+ { regex: /\b(the product|the app|the tool|the system)\b/i, template: keyword },
235
+ { regex: /\b(especially|particularly|specifically)\b/i, template: `$1 ${keyword}` },
236
+ { regex: /\b(broken|working|fast|slow|buggy)\b/i, template: `$1 (${keyword})` },
237
+ { regex: /\b(version|release|update)\b/i, template: `$1 (${keyword})` },
238
+ ];
239
+
240
+ // Add style-specific patterns if available
241
+ if (chance(0.2) && STYLE_MODIFIERS) {
242
+ if (STYLE_MODIFIERS.support && sentence.includes('error')) {
243
+ patterns.push({ regex: /\berror\b/i, template: `${keyword} error` });
244
+ }
245
+ if (STYLE_MODIFIERS.review && sentence.includes('good')) {
246
+ patterns.push({ regex: /\bgood\b/i, template: `good (${keyword})` });
247
+ }
248
+ }
249
+
250
+ for (const pattern of patterns) {
251
+ if (pattern.regex.test(sentence)) {
252
+ return sentence.replace(pattern.regex, pattern.template);
253
+ }
254
+ }
255
+
256
+ // Fallback: natural insertion
257
+ if (chance(0.3)) {
258
+ const insertions = [
259
+ `. Specifically ${keyword}.`,
260
+ ` - I mean ${keyword} -`,
261
+ ` (talking about ${keyword})`,
262
+ `, especially ${keyword},`
263
+ ];
264
+ const insertion = insertions[Math.floor(Math.random() * insertions.length)];
265
+ const insertPoint = sentence.lastIndexOf(' ');
266
+ if (insertPoint > 0) {
267
+ return sentence.slice(0, insertPoint) + insertion + sentence.slice(insertPoint);
268
+ }
269
+ }
270
+
271
+ return sentence;
272
+ }
273
+
274
+ getInjectedKeywords() {
275
+ return [...new Set(this.injectedKeywords)];
276
+ }
277
+ }
278
+
279
+ // ============= Authenticity Enhancement =============
280
+
281
+ class AuthenticityEnhancer {
282
+ constructor(level = 0.3) {
283
+ this.level = level;
284
+ }
285
+
286
+ enhance(text, sentimentScore = 0, formality = 'casual') {
287
+ let enhanced = text;
288
+
289
+ // Apply various realistic patterns
290
+ if (chance(this.level * 0.4)) enhanced = this.addSelfCorrection(enhanced);
291
+ if (chance(this.level * 0.3)) enhanced = this.addTrailOff(enhanced);
292
+ if (chance(this.level * 0.3)) enhanced = this.addSpecificDetail(enhanced);
293
+ if (chance(this.level * 0.2)) enhanced = this.addPersonalContext(enhanced);
294
+ if (chance(this.level * 0.2)) enhanced = this.addEmotionalMarker(enhanced, sentimentScore, 'medium');
295
+ if (chance(this.level * 0.1)) enhanced = this.addMetaCommentary(enhanced);
296
+ if (chance(this.level * 0.15)) enhanced = this.addFormalityMarkers(enhanced, formality);
297
+
298
+ return enhanced;
299
+ }
300
+
301
+ addFormalityMarkers(text, formality) {
302
+ if (formality === 'casual' && FORMALITY_MODIFIERS.casual) {
303
+ // Add casual contractions
304
+ if (chance(0.3) && FORMALITY_MODIFIERS.casual.contractions) {
305
+ text = text.replace(/\b(can not|cannot)\b/gi, "can't");
306
+ text = text.replace(/\b(will not)\b/gi, "won't");
307
+ text = text.replace(/\b(do not)\b/gi, "don't");
308
+ }
309
+
310
+ // Add casual slang occasionally
311
+ if (chance(0.2) && FORMALITY_MODIFIERS.casual.slang) {
312
+ const slang = FORMALITY_MODIFIERS.casual.slang;
313
+ const word = slang[Math.floor(Math.random() * slang.length)];
314
+ text = text.replace(/\b(going to)\b/i, word);
315
+ }
316
+ }
317
+
318
+ return text;
319
+ }
320
+
321
+ addSelfCorrection(text) {
322
+ const corrections = [
323
+ { find: /\bgood\b/, replace: 'good... well, decent' },
324
+ { find: /\bbad\b/, replace: 'bad... terrible actually' },
325
+ { find: /\bfast\b/, replace: 'fast (relatively speaking)' },
326
+ { find: /\bslow\b/, replace: 'slow... painfully slow' },
327
+ ];
328
+
329
+ const correction = corrections[Math.floor(Math.random() * corrections.length)];
330
+ return text.replace(correction.find, correction.replace);
331
+ }
332
+
333
+ addTrailOff(text) {
334
+ const trails = [
335
+ '...', '... whatever', '... I guess', '... you know?',
336
+ '... if that makes sense', '... or something'
337
+ ];
338
+
339
+ if (text.endsWith('.')) {
340
+ return text.slice(0, -1) + trails[Math.floor(Math.random() * trails.length)];
341
+ }
342
+ return text;
343
+ }
344
+
345
+ addSpecificDetail(text) {
346
+ const details = [
347
+ { find: /\berror\b/i, replace: 'error (404 specifically)' },
348
+ { find: /\bcrashes\b/i, replace: 'crashes (3-4 times daily)' },
349
+ { find: /\bslow\b/i, replace: 'slow (30+ seconds)' },
350
+ { find: /\bfast\b/i, replace: 'fast (under 100ms)' },
351
+ { find: /\bexpensive\b/i, replace: 'expensive ($299/month)' },
352
+ ];
353
+
354
+ for (const detail of details) {
355
+ if (detail.find.test(text)) {
356
+ return text.replace(detail.find, detail.replace);
357
+ }
358
+ }
359
+ return text;
360
+ }
361
+
362
+ addPersonalContext(text) {
363
+ const contexts = [
364
+ 'At my startup, ', 'In our enterprise setup, ', 'During the migration, ',
365
+ 'After the latest update, ', 'Since going remote, ', 'In production, '
366
+ ];
367
+
368
+ if (chance(0.5)) {
369
+ return contexts[Math.floor(Math.random() * contexts.length)] +
370
+ text.charAt(0).toLowerCase() + text.slice(1);
371
+ }
372
+ return text;
373
+ }
374
+
375
+ addEmotionalMarker(text, sentimentScore, intensity = 'medium') {
376
+ if (Math.abs(sentimentScore) > 5) {
377
+ const markers = sentimentScore > 0
378
+ ? ['!!!', ' 🎉', ' 💯', ' 🚀']
379
+ : ['...', ' 😤', ' 🤦', ' ugh'];
380
+
381
+ let result = text + markers[Math.floor(Math.random() * markers.length)];
382
+
383
+ // Add intensity amplifiers occasionally
384
+ if (chance(0.3) && INTENSITY_MODIFIERS[intensity]) {
385
+ const amplifiers = INTENSITY_MODIFIERS[intensity].amplifiers;
386
+ if (amplifiers) {
387
+ const amplifier = amplifiers[Math.floor(Math.random() * amplifiers.length)];
388
+ result = result.replace(/\b(is|was|feels?)\s+(\w+)/i, `$1 ${amplifier} $2`);
389
+ }
390
+ }
391
+
392
+ return result;
393
+ }
394
+ return text;
395
+ }
396
+
397
+ addMetaCommentary(text) {
398
+ const meta = ['EDIT: ', 'UPDATE: ', 'Note: ', 'BTW: ', 'PS: '];
399
+
400
+ if (chance(0.3)) {
401
+ return meta[Math.floor(Math.random() * meta.length)] + text;
402
+ }
403
+ return text;
404
+ }
405
+ }
406
+
407
+ // ============= Smart Typo Generator =============
408
+
409
+ class SmartTypoGenerator {
410
+ constructor() {
411
+ // Convert imported TYPO_PATTERNS array to expected object structure
412
+ if (TYPO_PATTERNS && Array.isArray(TYPO_PATTERNS)) {
413
+ // Convert the imported array format to the expected format
414
+ const convertedPatterns = TYPO_PATTERNS.map(item => ({
415
+ from: item.pattern,
416
+ to: item.replacements
417
+ }));
418
+
419
+ this.patterns = {
420
+ emotional: convertedPatterns,
421
+ mobile: convertedPatterns.slice(0, 3), // Use subset for mobile
422
+ technical: convertedPatterns.slice(0, 2) // Use subset for technical
423
+ };
424
+ } else {
425
+ // Fallback to built-in patterns
426
+ this.patterns = {
427
+ emotional: [
428
+ { from: /\bthe\b/g, to: ['teh', 'hte', 'th'] },
429
+ { from: /\byou\b/g, to: ['u', 'yuo', 'yu'] },
430
+ { from: /\band\b/g, to: ['adn', 'an', 'nad'] },
431
+ { from: /ing\b/g, to: ['ign', 'img', 'ing'] },
432
+ { from: /tion\b/g, to: ['toin', 'tion', 'iton'] }
433
+ ],
434
+ mobile: [
435
+ { from: /\s+/g, to: [''] }, // Missing spaces
436
+ { from: /([a-z])\1/g, to: ['$1'] }, // Missing double letters
437
+ ],
438
+ technical: [
439
+ { from: /\(\)/g, to: ['(', ')'] },
440
+ { from: /\;/g, to: [':'] },
441
+ { from: /\=/g, to: ['==', '='] }
442
+ ]
443
+ };
444
+ }
445
+
446
+ // Enhance with additional realistic patterns
447
+ if (chance(0.1)) {
448
+ this.addCommonTypos();
449
+ }
450
+ }
451
+
452
+ addCommonTypos() {
453
+ // Add more common typo patterns if not provided
454
+ this.patterns.emotional.push(
455
+ { from: /\btheir\b/g, to: ['thier', 'there', 'they\'re'] },
456
+ { from: /\byour\b/g, to: ['you\'re', 'ur', 'yur'] },
457
+ { from: /\bwould\b/g, to: ['woudl', 'wolud', 'wuold'] }
458
+ );
459
+ }
460
+
461
+ apply(text, rate = 0.02, style = 'general', sentimentScore = 0) {
462
+ // Higher typo rate when emotional
463
+ const emotionalMultiplier = 1 + (Math.abs(sentimentScore) / 20);
464
+ const adjustedRate = rate * emotionalMultiplier;
465
+
466
+ // Select appropriate patterns
467
+ const patternSet = Math.abs(sentimentScore) > 5 ? 'emotional' :
468
+ style === 'chat' ? 'mobile' :
469
+ style === 'support' ? 'technical' : 'emotional';
470
+
471
+ const patterns = this.patterns[patternSet];
472
+ let result = text;
473
+ let typoMomentum = 0;
474
+
475
+ const words = result.split(/(\s+)/);
476
+
477
+ for (let i = 0; i < words.length; i++) {
478
+ if (chance(adjustedRate * (1 + typoMomentum))) {
479
+ for (const pattern of patterns) {
480
+ if (pattern.from.test(words[i])) {
481
+ const replacement = pattern.to[Math.floor(Math.random() * pattern.to.length)];
482
+ words[i] = words[i].replace(pattern.from, replacement);
483
+ typoMomentum = 0.5; // Cluster typos
484
+ break;
485
+ }
486
+ }
487
+ } else {
488
+ typoMomentum *= 0.7; // Decay
489
+ }
490
+ }
491
+
492
+ return words.join('');
493
+ }
494
+ }
495
+
496
+ // ============= Mixed Sentiment Generator =============
497
+
498
+ class MixedSentimentGenerator {
499
+ generateNuanced(grammar, primaryTone, intensity = 'medium') {
500
+ const parts = [];
501
+
502
+ // Start with primary sentiment
503
+ parts.push(this.generateClause(grammar, primaryTone, intensity));
504
+
505
+ // Add contrasting view (30% chance)
506
+ if (chance(0.3)) {
507
+ const counterTone = primaryTone === 'pos' ? 'neg' :
508
+ primaryTone === 'neg' ? 'pos' : 'neu';
509
+ parts.push(`That said, ${this.generateClause(grammar, counterTone, 'low').toLowerCase()}`);
510
+ }
511
+
512
+ // Add neutral observation (20% chance)
513
+ if (chance(0.2)) {
514
+ parts.push(this.generateClause(grammar, 'neu', 'low'));
515
+ }
516
+
517
+ // Return to primary sentiment
518
+ if (parts.length > 1 && chance(0.5)) {
519
+ parts.push(`Overall though, ${this.generateClause(grammar, primaryTone, intensity).toLowerCase()}`);
520
+ }
521
+
522
+ return parts.join('. ');
523
+ }
524
+
525
+ generateClause(grammar, tone, intensity) {
526
+ const key = `#origin_${tone}_${intensity}#`;
527
+ const fallback = `#origin_${tone}#`;
528
+
529
+ try {
530
+ return grammar.flatten(key);
531
+ } catch {
532
+ return grammar.flatten(fallback);
533
+ }
534
+ }
535
+ }
536
+
537
+ // ============= Main Generator Class =============
538
+
539
+ /**
540
+ * Text generator with realistic language patterns
541
+ */
542
+ class RealisticTextGenerator {
543
+ /**
544
+ * Create a new generator instance
545
+ * @param {TextGeneratorConfig} config - Generator configuration
546
+ */
547
+ constructor(config = {}) {
548
+ // Apply defaults
549
+ this.config = {
550
+ tone: 'neu',
551
+ style: 'feedback',
552
+ intensity: 'medium',
553
+ formality: 'casual',
554
+ min: 100,
555
+ max: 500,
556
+ keywordDensity: 0.15,
557
+ typos: false,
558
+ typoRate: 0.02,
559
+ mixedSentiment: true,
560
+ authenticityLevel: 0.3,
561
+ timestamps: false,
562
+ userPersona: false,
563
+ sentimentDrift: 0.2,
564
+ includeMetadata: true,
565
+ specificityLevel: 0.5,
566
+ enableDeduplication: true,
567
+ maxAttempts: 50,
568
+ // performanceMode removed - we're always optimized now
569
+ ...config
570
+ };
571
+
572
+ // System is now always optimized for speed + uniqueness
573
+
574
+ // Always optimize for speed but maintain uniqueness
575
+ this.config.maxAttempts = Math.min(this.config.maxAttempts, 25); // Reasonable limit
576
+
577
+ // Initialize RNG if seed provided
578
+ if (this.config.seed) {
579
+ seedrandom(this.config.seed, { global: true });
580
+ }
581
+
582
+ // Initialize components
583
+ this.grammar = this.createGrammar();
584
+ this.keywordInjector = new KeywordInjector(this.config.keywords);
585
+ this.authenticityEnhancer = new AuthenticityEnhancer(this.config.authenticityLevel);
586
+ this.typoGenerator = new SmartTypoGenerator();
587
+ this.mixedSentimentGen = new MixedSentimentGenerator();
588
+
589
+ // Initialize diversity and caching systems
590
+ this.recentCache = new RecentTextCache(100); // Fast recent duplicate detection
591
+ const seedValue = typeof this.config.seed === 'string' ? parseInt(this.config.seed) : (this.config.seed || Date.now());
592
+ this.diversityRNG = new DiversityRNG(seedValue);
593
+
594
+ // Legacy deduplication tracking (kept for compatibility)
595
+ this.generatedHashes = new Set();
596
+ this.recentTexts = [];
597
+ this.maxRecentTexts = 100; // Limit memory usage and comparison time
598
+ this.currentTone = this.config.tone;
599
+ }
600
+
601
+ /**
602
+ * Create Tracery grammar from phrase bank
603
+ * @private
604
+ */
605
+ createGrammar() {
606
+ const g = tracery.createGrammar(PHRASE_BANK);
607
+ g.addModifiers(tracery.baseEngModifiers);
608
+ return g;
609
+ }
610
+
611
+ /**
612
+ * Generate a single text item
613
+ * @param {string} [tone] - Override tone for this generation
614
+ * @returns {GeneratedText|string|null} Generated text or null if failed
615
+ */
616
+ generateOne(tone = this.currentTone) {
617
+ for (let attempt = 0; attempt < this.config.maxAttempts; attempt++) {
618
+ // Allow sentiment drift
619
+ if (this.config.sentimentDrift > 0 && chance(this.config.sentimentDrift)) {
620
+ this.currentTone = this.driftTone(this.currentTone);
621
+ tone = this.currentTone;
622
+ }
623
+
624
+ // Generate base text
625
+ let text = this.generateBaseText(tone);
626
+ if (!text) continue;
627
+
628
+ // Apply enhancements
629
+ text = this.applyEnhancements(text, tone);
630
+
631
+ // Check constraints
632
+ if (!this.meetsConstraints(text, tone)) continue;
633
+
634
+ // Check for duplicates
635
+ if (this.config.enableDeduplication && this.isDuplicate(text)) continue;
636
+
637
+ // Add to recent cache for fast future duplicate detection
638
+ this.recentCache.add(text);
639
+
640
+ // Legacy deduplication tracking handled elsewhere if enabled
641
+
642
+ // Return based on metadata preference
643
+ if (this.config.includeMetadata) {
644
+ return this.createTextObject(text, tone);
645
+ }
646
+
647
+ return text;
648
+ }
649
+
650
+ return null;
651
+ }
652
+
653
+ /**
654
+ * Generate base text from grammar
655
+ * @private
656
+ */
657
+ generateBaseText(tone) {
658
+ // Use diversity RNG for varied generation
659
+ return this.diversityRNG.withDiversity(() => {
660
+ // Use mixed sentiment sometimes
661
+ if (this.config.mixedSentiment && chance(0.3)) {
662
+ return this.mixedSentimentGen.generateNuanced(
663
+ this.grammar,
664
+ tone,
665
+ this.config.intensity
666
+ );
667
+ }
668
+
669
+ // Generate from appropriate origin pattern
670
+ const styleKey = `#origin_${this.config.style}_${tone}#`;
671
+ const fallbackKey = `#origin_${tone}#`;
672
+
673
+ try {
674
+ return this.cleanText(this.grammar.flatten(styleKey));
675
+ } catch {
676
+ return this.cleanText(this.grammar.flatten(fallbackKey));
677
+ }
678
+ });
679
+ }
680
+
681
+ /**
682
+ * Apply all enhancement layers
683
+ * @private
684
+ */
685
+ applyEnhancements(text, tone) {
686
+ const sentScore = sentiment.analyze(text).score;
687
+
688
+ // 1. Keyword injection
689
+ if (this.config.keywords) {
690
+ text = this.keywordInjector.inject(text, this.config.keywordDensity);
691
+ }
692
+
693
+ // 2. Authenticity markers
694
+ text = this.authenticityEnhancer.enhance(text, sentScore, this.config.formality);
695
+
696
+ // 3. Length adjustment
697
+ text = this.adjustLength(text, tone);
698
+
699
+ // 4. Smart typos
700
+ if (this.config.typos) {
701
+ text = this.typoGenerator.apply(
702
+ text,
703
+ this.config.typoRate,
704
+ this.config.style,
705
+ sentScore
706
+ );
707
+ }
708
+
709
+ return text;
710
+ }
711
+
712
+ /**
713
+ * Adjust text to meet length requirements
714
+ * @private
715
+ */
716
+ adjustLength(text, tone) {
717
+ const currentLength = text.length;
718
+
719
+ if (currentLength < this.config.min) {
720
+ // Add more content
721
+ const additions = [
722
+ this.grammar.flatten(`#clause_${tone}#`),
723
+ this.grammar.flatten(`#react_${tone}#`),
724
+ this.grammar.flatten(`#obs_${tone}#`)
725
+ ];
726
+
727
+ while (text.length < this.config.min) {
728
+ const addition = additions[Math.floor(Math.random() * additions.length)];
729
+ text += '. ' + this.cleanText(addition);
730
+
731
+ if (text.length > this.config.min * 1.2) break;
732
+ }
733
+ }
734
+
735
+ if (text.length > this.config.max) {
736
+ // Smart truncation at sentence boundary
737
+ const truncated = text.slice(0, this.config.max);
738
+ const lastPeriod = truncated.lastIndexOf('.');
739
+
740
+ if (lastPeriod > this.config.min * 0.8) {
741
+ text = truncated.slice(0, lastPeriod + 1);
742
+ } else {
743
+ text = truncated.slice(0, this.config.max - 3) + '...';
744
+ }
745
+ }
746
+
747
+ return text;
748
+ }
749
+
750
+ /**
751
+ * Check if text meets all constraints (optimized)
752
+ * @private
753
+ */
754
+ meetsConstraints(text, tone) {
755
+ // Length check (fast)
756
+ if (text.length < this.config.min || text.length > this.config.max) {
757
+ return false;
758
+ }
759
+
760
+ // Fast recent duplicate check (always enabled for uniqueness)
761
+ if (this.recentCache.has(text)) {
762
+ return false;
763
+ }
764
+
765
+ // Skip expensive sentiment analysis for neutral tone or when mixed sentiment is enabled
766
+ if (tone === 'neu' || this.config.mixedSentiment) {
767
+ return true; // Accept most content for neutral/mixed sentiment
768
+ }
769
+
770
+ // Only do sentiment analysis when strict tone matching is needed
771
+ const score = sentiment.analyze(text).score;
772
+ const tolerance = 2; // Relaxed tolerance for performance
773
+
774
+ if (tone === 'pos' && score < -tolerance) return false;
775
+ if (tone === 'neg' && score > tolerance) return false;
776
+
777
+ return true;
778
+ }
779
+
780
+ /**
781
+ * Fast duplicate detection (much more efficient than SimHash)
782
+ * @private
783
+ */
784
+ isDuplicate(text) {
785
+ if (!this.config.enableDeduplication) return false;
786
+
787
+ // Quick exact duplicate check
788
+ const hash = fastHash(text);
789
+ if (this.generatedHashes.has(hash)) {
790
+ return true;
791
+ }
792
+
793
+ // Only check similarity against recent texts (not all texts)
794
+ const pattern = getWordPattern(text);
795
+ for (const recentText of this.recentTexts) {
796
+ if (calculateSimilarity(text, recentText) > 0.8) {
797
+ return true;
798
+ }
799
+ }
800
+
801
+ // Add to tracking structures
802
+ this.generatedHashes.add(hash);
803
+ this.recentTexts.push(text);
804
+
805
+ // Keep only recent texts to limit memory and comparison time
806
+ if (this.recentTexts.length > this.maxRecentTexts) {
807
+ this.recentTexts.shift();
808
+ }
809
+
810
+ return false;
811
+ }
812
+
813
+ /**
814
+ * Create text object with metadata (optimized)
815
+ * @private
816
+ */
817
+ createTextObject(text, tone) {
818
+ const metadata = {
819
+ style: this.config.style,
820
+ intensity: this.config.intensity,
821
+ formality: this.config.formality
822
+ };
823
+
824
+ // Only calculate expensive metrics when really needed
825
+ if (this.config.includeMetadata) {
826
+ // Sentiment analysis is expensive - only do it occasionally or when needed
827
+ if (chance(0.3)) {
828
+ metadata.sentimentScore = sentiment.analyze(text).score;
829
+ }
830
+
831
+ // Add timestamp if enabled
832
+ if (this.config.timestamps && chance(0.3)) {
833
+ const hour = Math.floor(Math.random() * 24);
834
+ const min = Math.floor(Math.random() * 60);
835
+ metadata.timestamp = `${hour}:${min.toString().padStart(2, '0')}`;
836
+ }
837
+
838
+ // Add persona if enabled
839
+ if (this.config.userPersona && chance(0.4)) {
840
+ metadata.persona = this.generatePersona();
841
+ }
842
+
843
+ // Add injected keywords
844
+ const keywords = this.keywordInjector.getInjectedKeywords();
845
+ if (keywords.length > 0) {
846
+ metadata.injectedKeywords = keywords;
847
+ }
848
+
849
+ // Readability calculation is also expensive - only occasionally
850
+ if (chance(0.2)) {
851
+ metadata.readabilityScore = this.calculateReadability(text);
852
+ }
853
+ }
854
+
855
+ return {
856
+ text,
857
+ tone,
858
+ metadata
859
+ };
860
+ }
861
+
862
+ /**
863
+ * Generate user persona
864
+ * @private
865
+ */
866
+ generatePersona() {
867
+ // Use USER_PERSONAS if available, otherwise fallback
868
+ if (USER_PERSONAS && USER_PERSONAS.length > 0 && chance(0.8)) {
869
+ const persona = USER_PERSONAS[Math.floor(Math.random() * USER_PERSONAS.length)];
870
+ return {
871
+ role: persona.role,
872
+ experience: persona.experience[Math.floor(Math.random() * persona.experience.length)],
873
+ domain: persona.domain[Math.floor(Math.random() * persona.domain.length)],
874
+ speech_pattern: persona.speech_patterns[Math.floor(Math.random() * persona.speech_patterns.length)]
875
+ };
876
+ }
877
+
878
+ // Fallback persona generation
879
+ const roles = ['developer', 'designer', 'manager', 'analyst', 'user', 'admin'];
880
+ const experience = ['junior', 'senior', 'lead', 'expert', 'new'];
881
+ const domains = ['startup', 'enterprise', 'agency', 'nonprofit'];
882
+
883
+ return {
884
+ role: roles[Math.floor(Math.random() * roles.length)],
885
+ experience: experience[Math.floor(Math.random() * experience.length)],
886
+ domain: domains[Math.floor(Math.random() * domains.length)]
887
+ };
888
+ }
889
+
890
+ /**
891
+ * Calculate Flesch Reading Ease score
892
+ * @private
893
+ */
894
+ calculateReadability(text) {
895
+ const words = text.split(/\s+/).length;
896
+ const sentences = text.split(/[.!?]+/).length;
897
+ const syllables = text.split(/\s+/).reduce((sum, word) =>
898
+ sum + this.countSyllables(word), 0);
899
+
900
+ const score = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words);
901
+ return Math.max(0, Math.min(100, Math.round(score)));
902
+ }
903
+
904
+ /**
905
+ * Count syllables in a word (approximation)
906
+ * @private
907
+ */
908
+ countSyllables(word) {
909
+ word = word.toLowerCase().replace(/[^a-z]/g, '');
910
+ const vowels = word.match(/[aeiou]/g);
911
+ return vowels ? vowels.length : 1;
912
+ }
913
+
914
+ /**
915
+ * Clean generated text
916
+ * @private
917
+ */
918
+ cleanText(text) {
919
+ return text
920
+ .replace(/\s+/g, ' ')
921
+ .replace(/\s([,.!?;:])/g, '$1')
922
+ .replace(/\s+([.!?])/g, '$1')
923
+ .trim();
924
+ }
925
+
926
+ /**
927
+ * Drift tone naturally
928
+ * @private
929
+ */
930
+ driftTone(currentTone) {
931
+ const drifts = {
932
+ 'pos': chance(0.7) ? 'pos' : (chance(0.8) ? 'neu' : 'neg'),
933
+ 'neg': chance(0.7) ? 'neg' : (chance(0.8) ? 'neu' : 'pos'),
934
+ 'neu': chance(0.5) ? 'neu' : (chance(0.5) ? 'pos' : 'neg')
935
+ };
936
+
937
+ return drifts[currentTone] || currentTone;
938
+ }
939
+
940
+ /**
941
+ * Generate multiple text items (alias for generateBatch)
942
+ * @param {TextBatchOptions} options - Batch generation options
943
+ * @returns {(string|GeneratedText|SimpleGeneratedText)[]} Array of generated texts
944
+ */
945
+ generateMany(options) {
946
+ return this.generateBatch(options);
947
+ }
948
+
949
+ /**
950
+ * Generate one text with randomized semantic options
951
+ * @returns {string|GeneratedText|null} Generated text
952
+ */
953
+ generateRandom() {
954
+ // Randomize unspecified semantic options for variety
955
+ const randomTone = ['pos', 'neg', 'neu'][Math.floor(Math.random() * 3)];
956
+ const randomStyle = ['support', 'review', 'search', 'feedback', 'chat'][Math.floor(Math.random() * 5)];
957
+ const randomIntensity = ['low', 'medium', 'high'][Math.floor(Math.random() * 3)];
958
+
959
+ // Use random values occasionally
960
+ const tone = chance(0.3) ? randomTone : this.config.tone;
961
+ const tempStyle = chance(0.2) ? randomStyle : this.config.style;
962
+ const tempIntensity = chance(0.2) ? randomIntensity : this.config.intensity;
963
+
964
+ // Temporarily adjust config for this generation
965
+ const originalStyle = this.config.style;
966
+ const originalIntensity = this.config.intensity;
967
+
968
+ this.config.style = tempStyle;
969
+ this.config.intensity = tempIntensity;
970
+
971
+ const result = this.generateOne(tone);
972
+
973
+ // Restore original config
974
+ this.config.style = originalStyle;
975
+ this.config.intensity = originalIntensity;
976
+
977
+ return result;
978
+ }
979
+
980
+ /**
981
+ * Generate multiple text items
982
+ * @param {TextBatchOptions} options - Batch generation options
983
+ * @returns {(string|GeneratedText|SimpleGeneratedText)[]} Array of generated texts
984
+ */
985
+ generateBatch(options) {
986
+ const {
987
+ n = 10,
988
+ returnType = 'strings',
989
+ tone = this.config.tone,
990
+ related = false,
991
+ sharedContext = null
992
+ } = options;
993
+
994
+ const results = [];
995
+
996
+ // Reset for new batch
997
+ this.currentTone = tone;
998
+
999
+ // Generate shared context if related
1000
+ let context = sharedContext;
1001
+ if (related && !context) {
1002
+ const contexts = ['new feature', 'recent update', 'pricing change', 'UI redesign'];
1003
+ context = contexts[Math.floor(Math.random() * contexts.length)];
1004
+ }
1005
+
1006
+ for (let i = 0; i < n; i++) {
1007
+ let item = this.generateOne(tone);
1008
+
1009
+ if (!item) continue;
1010
+
1011
+ // Add shared context if related
1012
+ if (related && context) {
1013
+ const text = typeof item === 'string' ? item : item.text;
1014
+ const contextualText = this.addSharedContext(text, context);
1015
+
1016
+ if (typeof item === 'string') {
1017
+ item = contextualText;
1018
+ } else {
1019
+ item.text = contextualText;
1020
+ }
1021
+ }
1022
+
1023
+ // Format based on return type
1024
+ if (returnType === 'strings') {
1025
+ results.push(typeof item === 'string' ? item : item.text);
1026
+ } else {
1027
+ results.push(typeof item === 'string' ? { text: item, tone } : item);
1028
+ }
1029
+ }
1030
+
1031
+ return results;
1032
+ }
1033
+
1034
+ /**
1035
+ * Add shared context to text
1036
+ * @private
1037
+ */
1038
+ addSharedContext(text, context) {
1039
+ // Use discourse markers occasionally for more natural transitions
1040
+ if (chance(0.3) && DISCOURSE_MARKERS) {
1041
+ const markers = DISCOURSE_MARKERS.opinion || ['I think', 'In my opinion', 'From my experience'];
1042
+ const marker = markers[Math.floor(Math.random() * markers.length)];
1043
+ return `${marker}, regarding the ${context}: ${text.toLowerCase()}`;
1044
+ }
1045
+
1046
+ if (chance(0.5)) {
1047
+ return `About the ${context}: ${text}`;
1048
+ } else if (chance(0.5)) {
1049
+ return text.replace(/\. /, `. Regarding the ${context}, `);
1050
+ }
1051
+ return text;
1052
+ }
1053
+
1054
+ /**
1055
+ * Get generator statistics
1056
+ * @returns {Object} Statistics about generation
1057
+ */
1058
+ getStats() {
1059
+ return {
1060
+ generatedCount: this.generatedHashes.size,
1061
+ currentTone: this.currentTone,
1062
+ config: this.config,
1063
+ cacheSize: this.generatedHashes.size
1064
+ };
1065
+ }
1066
+
1067
+ /**
1068
+ * Reset generator state
1069
+ */
1070
+ reset() {
1071
+ this.generatedHashes.clear();
1072
+ this.currentTone = this.config.tone;
1073
+ this.keywordInjector = new KeywordInjector(this.config.keywords);
1074
+ }
1075
+ }
1076
+
1077
+ // ============= Public API =============
1078
+
1079
+ /**
1080
+ * Create a new text generator instance
1081
+ * @param {TextGeneratorConfig} config - Configuration options
1082
+ * @returns {TextGenerator} Generator instance
1083
+ */
1084
+ export function createGenerator(config = {}) {
1085
+ return new RealisticTextGenerator(config);
1086
+ }
1087
+
1088
+ /**
1089
+ * Alias for createGenerator (backwards compatibility)
1090
+ * @param {TextGeneratorConfig} config - Configuration options
1091
+ * @returns {TextGenerator} Generator instance
1092
+ */
1093
+ export function buildGenerator(config = {}) {
1094
+ return new RealisticTextGenerator(config);
1095
+ }
1096
+
1097
+ /**
1098
+ * Generate a single text item (convenience function)
1099
+ * @param {TextGeneratorConfig} config - Configuration options
1100
+ * @returns {string|GeneratedText|null} Generated text
1101
+ */
1102
+ export function generateOne(config = {}) {
1103
+ const generator = new RealisticTextGenerator(config);
1104
+ return generator.generateOne();
1105
+ }
1106
+
1107
+ /**
1108
+ * Generate multiple text items (convenience function)
1109
+ * @param {TextGeneratorConfig & TextBatchOptions} options - Configuration and batch options
1110
+ * @returns {(string|GeneratedText|SimpleGeneratedText)[]} Array of generated texts
1111
+ */
1112
+ export function generateBatch(options) {
1113
+ const { n, returnType, tone, related, sharedContext, ...config } = options;
1114
+ const generator = new RealisticTextGenerator(config);
1115
+ return generator.generateBatch({ n, returnType, tone, related, sharedContext });
1116
+ }
1117
+
1118
+ // Export main class as default
1119
+ export default RealisticTextGenerator;
1120
+
1121
+