@realtimex/realtimex-alchemy 1.0.42 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/CHANGELOG.md +17 -0
- package/dist/api/index.js +120 -0
- package/dist/api/services/AlchemistService.js +9 -5
- package/dist/api/services/ChatService.js +3 -2
- package/dist/api/services/RouterService.js +4 -7
- package/dist/api/services/TransmuteService.js +162 -7
- package/dist/api/utils/DOMSanitizer.js +62 -0
- package/dist/api/utils/contentCleaner.js +71 -383
- package/dist/assets/index-BcolxI8u.css +1 -0
- package/dist/assets/index-DKtbsbuu.js +125 -0
- package/dist/index.html +2 -2
- package/dist/shared/constants.js +23 -0
- package/package.json +4 -4
- package/dist/assets/index-BdYsvKvV.css +0 -1
- package/dist/assets/index-BoqZas2I.js +0 -124
package/dist/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.0.44] - 2026-01-26
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Setup Wizard**: Added a UI-based migration tool to streamline the initial setup. Users can now run database migrations directly from the setup interface.
|
|
12
|
+
- **Migration**: Added `POST /api/migrate` endpoint to trigger database migrations from the frontend, streaming real-time logs via SSE.
|
|
13
|
+
|
|
14
|
+
### Improved
|
|
15
|
+
- **Scripts**: Enhanced `migrate.sh` to support non-interactive authentication (via Access Tokens or DB Passwords) and automatic `TOKEN_ENCRYPTION_KEY` generation for Edge Functions.
|
|
16
|
+
|
|
17
|
+
## [1.0.43] - 2026-01-26
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
- **Settings**: Added personalized "Blocked Tags" management in the Intelligence Engine settings, allowing users to override default filters.
|
|
21
|
+
|
|
22
|
+
### Improved
|
|
23
|
+
- **Transmute Engine**: Optimized automatic engine creation to only generate newsletter pipelines for active categories (excluding "Other"), significantly reducing noise.
|
|
24
|
+
|
|
8
25
|
## [1.0.42] - 2026-01-26
|
|
9
26
|
|
|
10
27
|
### Added
|
package/dist/api/index.js
CHANGED
|
@@ -2,6 +2,7 @@ import express from 'express';
|
|
|
2
2
|
import cors from 'cors';
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { fileURLToPath } from 'url';
|
|
5
|
+
import { spawn } from 'child_process';
|
|
5
6
|
import { MinerService } from './services/MinerService.js';
|
|
6
7
|
import { AlchemistService } from './services/AlchemistService.js';
|
|
7
8
|
import { LibrarianService } from './services/LibrarianService.js';
|
|
@@ -27,6 +28,109 @@ const events = EventService.getInstance();
|
|
|
27
28
|
app.get('/health', (req, res) => {
|
|
28
29
|
res.json({ status: 'active', platform: process.platform });
|
|
29
30
|
});
|
|
31
|
+
// Run database migrations (SSE stream)
|
|
32
|
+
app.post('/api/migrate', (req, res) => {
|
|
33
|
+
const { projectId, dbPassword, accessToken } = req.body;
|
|
34
|
+
if (!projectId) {
|
|
35
|
+
return res.status(400).json({ error: 'Project ID is required' });
|
|
36
|
+
}
|
|
37
|
+
// Set up SSE for streaming output
|
|
38
|
+
res.setHeader('Content-Type', 'text/event-stream');
|
|
39
|
+
res.setHeader('Cache-Control', 'no-cache');
|
|
40
|
+
res.setHeader('Connection', 'keep-alive');
|
|
41
|
+
const sendEvent = (type, data) => {
|
|
42
|
+
res.write(`data: ${JSON.stringify({ type, data })}\n\n`);
|
|
43
|
+
};
|
|
44
|
+
sendEvent('info', '🚀 Starting migration...');
|
|
45
|
+
// Find the migrate.sh script - check multiple possible locations
|
|
46
|
+
// In dev: api/../scripts/migrate.sh
|
|
47
|
+
// In prod: dist/api/../../scripts/migrate.sh
|
|
48
|
+
const possiblePaths = [
|
|
49
|
+
path.join(__dirname, '..', 'scripts', 'migrate.sh'), // dev mode
|
|
50
|
+
path.join(__dirname, '..', '..', 'scripts', 'migrate.sh'), // compiled dist/api/
|
|
51
|
+
path.join(process.cwd(), 'scripts', 'migrate.sh') // fallback to cwd
|
|
52
|
+
];
|
|
53
|
+
const scriptPath = possiblePaths.find(p => fs.existsSync(p));
|
|
54
|
+
const projectRoot = scriptPath ? path.dirname(path.dirname(scriptPath)) : process.cwd();
|
|
55
|
+
if (!scriptPath) {
|
|
56
|
+
sendEvent('error', `Migration script not found. Searched: ${possiblePaths.join(', ')}`);
|
|
57
|
+
sendEvent('done', 'failed');
|
|
58
|
+
return res.end();
|
|
59
|
+
}
|
|
60
|
+
sendEvent('info', `Found script at: ${scriptPath}`);
|
|
61
|
+
sendEvent('info', `Working directory: ${projectRoot}`);
|
|
62
|
+
// Prepare environment - support both access token and database password
|
|
63
|
+
const env = {
|
|
64
|
+
...process.env,
|
|
65
|
+
SUPABASE_PROJECT_ID: projectId,
|
|
66
|
+
// Ensure PATH includes common locations for supabase CLI
|
|
67
|
+
PATH: `${process.env.PATH}:/usr/local/bin:/opt/homebrew/bin:${projectRoot}/node_modules/.bin`
|
|
68
|
+
};
|
|
69
|
+
// Access token is preferred for non-interactive auth
|
|
70
|
+
if (accessToken) {
|
|
71
|
+
env.SUPABASE_ACCESS_TOKEN = accessToken;
|
|
72
|
+
sendEvent('info', 'Using access token for authentication');
|
|
73
|
+
}
|
|
74
|
+
if (dbPassword) {
|
|
75
|
+
env.SUPABASE_DB_PASSWORD = dbPassword;
|
|
76
|
+
sendEvent('info', 'Using database password for authentication');
|
|
77
|
+
}
|
|
78
|
+
// Track process state
|
|
79
|
+
let processCompleted = false;
|
|
80
|
+
// Spawn the migration script in its own process group
|
|
81
|
+
const child = spawn('bash', [scriptPath], {
|
|
82
|
+
env,
|
|
83
|
+
cwd: projectRoot,
|
|
84
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
85
|
+
detached: true // Run in separate process group
|
|
86
|
+
});
|
|
87
|
+
// Don't let the parent wait for this child
|
|
88
|
+
child.unref();
|
|
89
|
+
sendEvent('info', `Process spawned with PID: ${child.pid}`);
|
|
90
|
+
child.stdout.on('data', (data) => {
|
|
91
|
+
const lines = data.toString().split('\n').filter((l) => l.trim());
|
|
92
|
+
for (const line of lines) {
|
|
93
|
+
sendEvent('stdout', line);
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
child.stderr.on('data', (data) => {
|
|
97
|
+
const lines = data.toString().split('\n').filter((l) => l.trim());
|
|
98
|
+
for (const line of lines) {
|
|
99
|
+
// Supabase CLI outputs progress to stderr, not always errors
|
|
100
|
+
sendEvent('stderr', line);
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
child.on('close', (code, signal) => {
|
|
104
|
+
processCompleted = true;
|
|
105
|
+
if (code === 0) {
|
|
106
|
+
sendEvent('info', '✅ Migration completed successfully!');
|
|
107
|
+
sendEvent('done', 'success');
|
|
108
|
+
}
|
|
109
|
+
else if (signal) {
|
|
110
|
+
sendEvent('error', `Migration killed by signal: ${signal}`);
|
|
111
|
+
sendEvent('done', 'failed');
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
sendEvent('error', `Migration failed with exit code ${code}`);
|
|
115
|
+
sendEvent('done', 'failed');
|
|
116
|
+
}
|
|
117
|
+
res.end();
|
|
118
|
+
});
|
|
119
|
+
child.on('error', (err) => {
|
|
120
|
+
processCompleted = true;
|
|
121
|
+
sendEvent('error', `Failed to start migration: ${err.message}`);
|
|
122
|
+
sendEvent('done', 'failed');
|
|
123
|
+
res.end();
|
|
124
|
+
});
|
|
125
|
+
// Don't kill the process on client disconnect - let migration complete
|
|
126
|
+
// The process should finish on its own, and failed writes are handled gracefully
|
|
127
|
+
let clientConnected = true;
|
|
128
|
+
req.on('close', () => {
|
|
129
|
+
clientConnected = false;
|
|
130
|
+
// Don't kill the process - let it complete
|
|
131
|
+
console.log('[Migrate] Client disconnected, but migration will continue');
|
|
132
|
+
});
|
|
133
|
+
});
|
|
30
134
|
// SSE Events
|
|
31
135
|
app.get('/events', (req, res) => {
|
|
32
136
|
res.setHeader('Content-Type', 'text/event-stream');
|
|
@@ -458,6 +562,22 @@ if (fs.existsSync(staticPath)) {
|
|
|
458
562
|
res.status(500).json({ error: error.message || 'Engine brief fetch failed' });
|
|
459
563
|
}
|
|
460
564
|
});
|
|
565
|
+
app.post('/api/engines/ensure-defaults', async (req, res) => {
|
|
566
|
+
try {
|
|
567
|
+
const userId = req.headers['x-user-id'];
|
|
568
|
+
if (!userId) {
|
|
569
|
+
return res.status(401).json({ error: 'Unauthorized: Missing User ID' });
|
|
570
|
+
}
|
|
571
|
+
console.log(`[API] Ensuring default engines for user ${userId}`);
|
|
572
|
+
const supabase = getAuthenticatedSupabase(req);
|
|
573
|
+
await transmuteService.ensureDefaultNewsletterEngines(userId, supabase);
|
|
574
|
+
res.json({ success: true });
|
|
575
|
+
}
|
|
576
|
+
catch (error) {
|
|
577
|
+
console.error('[API] Failed to ensure default engines:', error);
|
|
578
|
+
res.status(500).json({ error: error.message || 'Failed to ensure default engines' });
|
|
579
|
+
}
|
|
580
|
+
});
|
|
461
581
|
// Client-side routing fallback (Bypass path-to-regexp error in Express 5)
|
|
462
582
|
app.use((req, res, next) => {
|
|
463
583
|
if (!req.path.startsWith('/api') && !req.path.startsWith('/events')) {
|
|
@@ -4,6 +4,7 @@ import { embeddingService } from './EmbeddingService.js';
|
|
|
4
4
|
import { deduplicationService } from './DeduplicationService.js';
|
|
5
5
|
import { SDKService } from './SDKService.js';
|
|
6
6
|
import { ContentCleaner } from '../utils/contentCleaner.js';
|
|
7
|
+
import { transmuteService } from './TransmuteService.js';
|
|
7
8
|
export class AlchemistService {
|
|
8
9
|
processingEvents;
|
|
9
10
|
router;
|
|
@@ -111,9 +112,8 @@ export class AlchemistService {
|
|
|
111
112
|
let rawContent = result.content;
|
|
112
113
|
finalUrl = result.finalUrl;
|
|
113
114
|
if (rawContent && rawContent.length > 20) {
|
|
114
|
-
// HIGHLIGHT: Payload Hygiene -
|
|
115
|
-
|
|
116
|
-
const cleaned = ContentCleaner.cleanContent(rawContent);
|
|
115
|
+
// HIGHLIGHT: Payload Hygiene - Content is already cleaned by RouterService
|
|
116
|
+
const cleaned = rawContent;
|
|
117
117
|
// Check if this is a login wall or paywall
|
|
118
118
|
isGatedContent = ContentCleaner.isGatedContent(cleaned);
|
|
119
119
|
if (isGatedContent) {
|
|
@@ -158,7 +158,7 @@ export class AlchemistService {
|
|
|
158
158
|
summary: isGatedContent ? 'Login or subscription required to access this content.' : response.summary,
|
|
159
159
|
category: response.category,
|
|
160
160
|
entities: response.entities,
|
|
161
|
-
tags: response.tags,
|
|
161
|
+
tags: (response.tags || []).map(t => t.toLowerCase().trim()),
|
|
162
162
|
content: content,
|
|
163
163
|
// Mark as dismissed if low score OR gated content
|
|
164
164
|
is_dismissed: response.score < 50 || isGatedContent,
|
|
@@ -245,7 +245,11 @@ export class AlchemistService {
|
|
|
245
245
|
},
|
|
246
246
|
userId
|
|
247
247
|
}, supabase);
|
|
248
|
-
// 6. Trigger Background
|
|
248
|
+
// 6. Trigger Background Engine Discovery (NEW: Dynamically create engines after sync)
|
|
249
|
+
transmuteService.ensureDefaultNewsletterEngines(userId, supabase).catch(err => {
|
|
250
|
+
console.error('[AlchemistService] Background engine discovery failed:', err);
|
|
251
|
+
});
|
|
252
|
+
// 7. Trigger Background Persona Consolidation (don't await)
|
|
249
253
|
import('./PersonaService.js').then(({ personaService }) => {
|
|
250
254
|
personaService.consolidatePersona(userId, supabase).catch(err => {
|
|
251
255
|
console.error('[AlchemistService] Background persona update failed:', err);
|
|
@@ -105,12 +105,13 @@ Be concise, helpful, and professional.
|
|
|
105
105
|
...previousMessages.map(m => ({ role: m.role, content: m.content })),
|
|
106
106
|
{ role: 'user', content: finalPrompt } // Current turn with RAG context
|
|
107
107
|
];
|
|
108
|
+
console.log('[ChatService] Final Prompt being sent to LLM:', JSON.stringify(messages, null, 2));
|
|
108
109
|
const response = await sdk.llm.chat(messages, {
|
|
109
110
|
provider: settings.llm_provider || 'realtimexai',
|
|
110
|
-
model: settings.llm_model || 'gpt-4o
|
|
111
|
+
model: settings.llm_model || 'gpt-4o'
|
|
111
112
|
});
|
|
112
113
|
console.log('[ChatService] LLM Response:', JSON.stringify(response, null, 2));
|
|
113
|
-
const aiContent = response.response?.content || "I'm sorry, I couldn't generate a response.";
|
|
114
|
+
const aiContent = response.response?.content || "I'm sorry, I couldn't generate a response. The LLM returned empty content.";
|
|
114
115
|
// 6. Save Assistant Message
|
|
115
116
|
const { data: aiMsg, error: aiError } = await supabase
|
|
116
117
|
.from('chat_messages')
|
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
2
|
import puppeteer from 'puppeteer';
|
|
3
|
-
import TurndownService from 'turndown';
|
|
4
3
|
import { EventService } from './EventService.js';
|
|
5
4
|
import { ContentCleaner } from '../utils/contentCleaner.js';
|
|
6
5
|
export class RouterService {
|
|
7
|
-
turndown = new TurndownService();
|
|
8
6
|
events = EventService.getInstance();
|
|
9
7
|
async extractContent(url) {
|
|
10
8
|
this.events.emit({ type: 'router', message: `Attempting Tier 1 Extraction (Axios): ${url.substring(0, 30)}...` });
|
|
@@ -26,9 +24,8 @@ export class RouterService {
|
|
|
26
24
|
// But usually responseUrl is the reliable one.
|
|
27
25
|
}
|
|
28
26
|
const rawHtml = response.data;
|
|
29
|
-
// Payload Hygiene:
|
|
30
|
-
const
|
|
31
|
-
const markdown = this.turndown.turndown(sanitizedHtml);
|
|
27
|
+
// Payload Hygiene: Full HTML Pipeline (Sanitize -> Markdown -> Polish)
|
|
28
|
+
const markdown = ContentCleaner.cleanContent(rawHtml);
|
|
32
29
|
if (markdown.length > 500) {
|
|
33
30
|
this.events.emit({ type: 'router', message: `Tier 1 Success (${markdown.length} chars) -> ${finalUrl.substring(0, 30)}...` });
|
|
34
31
|
return { content: markdown, finalUrl };
|
|
@@ -47,9 +44,9 @@ export class RouterService {
|
|
|
47
44
|
// Capture final URL from page object
|
|
48
45
|
finalUrl = page.url();
|
|
49
46
|
const content = await page.content();
|
|
50
|
-
|
|
47
|
+
// Payload Hygiene: Full HTML Pipeline (Sanitize -> Markdown -> Polish)
|
|
48
|
+
const markdown = ContentCleaner.cleanContent(content);
|
|
51
49
|
await browser.close();
|
|
52
|
-
const markdown = this.turndown.turndown(sanitizedHtml);
|
|
53
50
|
this.events.emit({ type: 'router', message: `Tier 2 Success (${markdown.length} chars) -> ${finalUrl.substring(0, 30)}...` });
|
|
54
51
|
return { content: markdown, finalUrl };
|
|
55
52
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import path from 'path';
|
|
2
2
|
import os from 'os';
|
|
3
3
|
import { SDKService } from './SDKService.js';
|
|
4
|
-
import {
|
|
4
|
+
import { BLOCKED_TAGS as DEFAULT_BLOCKED_TAGS } from '../../shared/constants.js';
|
|
5
5
|
export class TransmuteService {
|
|
6
6
|
/**
|
|
7
7
|
* Run a specific engine pipeline
|
|
@@ -119,8 +119,8 @@ export class TransmuteService {
|
|
|
119
119
|
summary: s.summary,
|
|
120
120
|
url: bestUrl, // Best Available (Resolved) URL
|
|
121
121
|
source_urls: uniqueUrls, // All associated direct URLs
|
|
122
|
-
//
|
|
123
|
-
content: s.content
|
|
122
|
+
// Return clean/raw content (already cleaned by RouterService)
|
|
123
|
+
content: s.content || undefined
|
|
124
124
|
};
|
|
125
125
|
});
|
|
126
126
|
// 4. Construct high-fidelity System Prompt
|
|
@@ -169,16 +169,31 @@ export class TransmuteService {
|
|
|
169
169
|
* Fetch relevant signals based on engine configuration
|
|
170
170
|
*/
|
|
171
171
|
async fetchContextSignals(userId, config, supabase) {
|
|
172
|
+
const maxSignals = config.max_signals || 10;
|
|
172
173
|
let query = supabase
|
|
173
174
|
.from('signals')
|
|
174
175
|
.select('*')
|
|
175
176
|
.eq('user_id', userId)
|
|
176
177
|
.order('score', { ascending: false })
|
|
177
|
-
.limit(
|
|
178
|
-
|
|
179
|
-
|
|
178
|
+
.limit(maxSignals);
|
|
179
|
+
// Support both single category (legacy) and multi-select categories
|
|
180
|
+
const categories = Array.isArray(config.category) ? config.category : (config.category ? [config.category] : []);
|
|
181
|
+
if (categories.length > 0 && !categories.includes('All')) {
|
|
182
|
+
// Use OR logic for multiple categories
|
|
183
|
+
query = query.in('category', categories);
|
|
180
184
|
}
|
|
181
|
-
|
|
185
|
+
// HIGHLIGHT: Support tag-based filtering (Dynamic Tag Engines)
|
|
186
|
+
if (config.tag) {
|
|
187
|
+
const normalizedTag = config.tag.toLowerCase().trim();
|
|
188
|
+
console.log(`[Transmute] Filtering by tag: "${normalizedTag}" (original: "${config.tag}")`);
|
|
189
|
+
query = query.contains('tags', [normalizedTag]);
|
|
190
|
+
}
|
|
191
|
+
const { data, error } = await query;
|
|
192
|
+
if (error) {
|
|
193
|
+
console.error('[Transmute] Signal query failed:', error);
|
|
194
|
+
return [];
|
|
195
|
+
}
|
|
196
|
+
console.log(`[Transmute] Retrieved ${data?.length || 0} signals for user ${userId}`);
|
|
182
197
|
return (data || []);
|
|
183
198
|
}
|
|
184
199
|
/**
|
|
@@ -222,6 +237,146 @@ export class TransmuteService {
|
|
|
222
237
|
});
|
|
223
238
|
return response.response?.content || "Failed to generate content.";
|
|
224
239
|
}
|
|
240
|
+
/**
|
|
241
|
+
* Ensure default newsletter engines exist for each category
|
|
242
|
+
*/
|
|
243
|
+
/**
|
|
244
|
+
* Ensure default newsletter engines exist for each category
|
|
245
|
+
* This is "Self-Healing": it creates missing engines based on discovered signals.
|
|
246
|
+
*/
|
|
247
|
+
async ensureDefaultNewsletterEngines(userId, supabase) {
|
|
248
|
+
console.log(`[Transmute] Running Self-Healing Engine Discovery for user ${userId}...`);
|
|
249
|
+
// 1. Fetch Active Categories from Signals
|
|
250
|
+
// We only create engines for categories that actually have data.
|
|
251
|
+
const { data: signalStats } = await supabase
|
|
252
|
+
.from('signals')
|
|
253
|
+
.select('category')
|
|
254
|
+
.eq('user_id', userId);
|
|
255
|
+
const activeCategories = new Set(signalStats?.map(s => s.category).filter(Boolean) || []);
|
|
256
|
+
// 2. Fetch Existing Pipelines to avoid duplicates
|
|
257
|
+
const { data: existingEngines } = await supabase
|
|
258
|
+
.from('engines')
|
|
259
|
+
.select('title, config')
|
|
260
|
+
.eq('user_id', userId)
|
|
261
|
+
.eq('type', 'newsletter');
|
|
262
|
+
const existingTitles = new Set(existingEngines?.map(e => e.title) || []);
|
|
263
|
+
const existingCategories = new Set(existingEngines?.map(e => e.config?.category).filter(Boolean));
|
|
264
|
+
const existingTags = new Set(existingEngines?.map(e => e.config?.tag?.toLowerCase().trim()).filter(Boolean));
|
|
265
|
+
// 3. Create Engines for Active Categories (Exclude 'Other')
|
|
266
|
+
for (const category of activeCategories) {
|
|
267
|
+
// Filter out 'Other' (case-insensitive) and empty strings
|
|
268
|
+
if (!category || category.toLowerCase() === 'other')
|
|
269
|
+
continue;
|
|
270
|
+
// STRICT 1:1 CHECK: Skip if title exists OR category is already covered
|
|
271
|
+
const title = `${category} Daily`;
|
|
272
|
+
if (existingTitles.has(title) || existingCategories.has(category)) {
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
console.log(`[Transmute] Bootstrapping missing category engine: ${title}`);
|
|
276
|
+
const config = {
|
|
277
|
+
category,
|
|
278
|
+
execution_mode: 'desktop',
|
|
279
|
+
schedule: 'Daily',
|
|
280
|
+
llm_provider: 'realtimexai',
|
|
281
|
+
llm_model: 'gpt-4o',
|
|
282
|
+
max_signals: 30,
|
|
283
|
+
custom_prompt: `Create a comprehensive daily newsletter focused on ${category}. Highlight the most important developments, key insights, and actionable takeaways. Use a professional, insight-driven tone with clear structure: start with 'The Big Story' followed by 'Quick Hits' for other notable items.`
|
|
284
|
+
};
|
|
285
|
+
await supabase
|
|
286
|
+
.from('engines')
|
|
287
|
+
.insert({
|
|
288
|
+
user_id: userId,
|
|
289
|
+
title: title,
|
|
290
|
+
type: 'newsletter',
|
|
291
|
+
config: config,
|
|
292
|
+
status: 'active'
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
// 4. Dynamic Tag-Based Categories
|
|
296
|
+
// De-prioritized for now to reduce noise.
|
|
297
|
+
// Only run if the threshold is very high or if category mapping isn't enough.
|
|
298
|
+
// await this.ensureDynamicTagEngines(userId, supabase, existingTitles, existingTags);
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Find popular tags and create engines for them
|
|
302
|
+
* Treat popular tags as "dynamic categories" with 1:1 mapping.
|
|
303
|
+
*/
|
|
304
|
+
async ensureDynamicTagEngines(userId, supabase, existingTitles, existingTags) {
|
|
305
|
+
// 1. Fetch all tags for the user
|
|
306
|
+
const { data: signals } = await supabase
|
|
307
|
+
.from('signals')
|
|
308
|
+
.select('tags')
|
|
309
|
+
.eq('user_id', userId);
|
|
310
|
+
if (!signals || signals.length === 0)
|
|
311
|
+
return;
|
|
312
|
+
// 2. Fetch User Settings (for blocked tags)
|
|
313
|
+
const { data: settings } = await supabase
|
|
314
|
+
.from('alchemy_settings')
|
|
315
|
+
.select('blocked_tags')
|
|
316
|
+
.eq('user_id', userId)
|
|
317
|
+
.maybeSingle();
|
|
318
|
+
const userBlockedTags = new Set((settings?.blocked_tags || []).map(t => t.toLowerCase().trim()));
|
|
319
|
+
// Use user blocked tags if they exist, otherwise fallback to system defaults
|
|
320
|
+
const systemDefaults = Array.from(DEFAULT_BLOCKED_TAGS).map(t => t.toLowerCase().trim());
|
|
321
|
+
const BLOCKED_TAGS = userBlockedTags.size > 0 ? userBlockedTags : new Set(systemDefaults);
|
|
322
|
+
// Core category terms to skip (already handled by category discovery)
|
|
323
|
+
const CORE_CATEGORY_TERMS = new Set([
|
|
324
|
+
'ai', 'ml', 'machine learning', 'artificial intelligence',
|
|
325
|
+
'business', 'technology', 'tech', 'finance', 'financial',
|
|
326
|
+
'crypto', 'cryptocurrency', 'bitcoin', 'science', 'scientific',
|
|
327
|
+
'politics', 'political', 'government'
|
|
328
|
+
]);
|
|
329
|
+
const tagCounts = new Map();
|
|
330
|
+
const DYNAMIC_THRESHOLD = 50; // Increased significantly to reduce noise for now
|
|
331
|
+
const MAX_NEW_PER_RUN = 3;
|
|
332
|
+
signals.forEach(s => {
|
|
333
|
+
const tags = (s.tags || []);
|
|
334
|
+
tags.forEach(tag => {
|
|
335
|
+
const lower = tag.toLowerCase().trim();
|
|
336
|
+
// Filter out: empty, short, blocked, or core category overlaps
|
|
337
|
+
// Added check for 'redirect' and other junk specifically
|
|
338
|
+
if (!lower || lower.length < 3 || BLOCKED_TAGS.has(lower) || CORE_CATEGORY_TERMS.has(lower) || lower.includes('redirect')) {
|
|
339
|
+
return;
|
|
340
|
+
}
|
|
341
|
+
tagCounts.set(lower, (tagCounts.get(lower) || 0) + 1);
|
|
342
|
+
});
|
|
343
|
+
});
|
|
344
|
+
// 3. Identify and Sort Candidates
|
|
345
|
+
const candidates = Array.from(tagCounts.entries())
|
|
346
|
+
.filter(([_, count]) => count >= DYNAMIC_THRESHOLD)
|
|
347
|
+
.sort((a, b) => b[1] - a[1]);
|
|
348
|
+
let createdCount = 0;
|
|
349
|
+
for (const [tag, count] of candidates) {
|
|
350
|
+
const displayName = tag.split(' ').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
|
|
351
|
+
const title = `${displayName} Daily`;
|
|
352
|
+
// STRICT 1:1 CHECK: Skip if title exists OR tag is already covered
|
|
353
|
+
if (existingTitles.has(title) || existingTags.has(tag)) {
|
|
354
|
+
continue;
|
|
355
|
+
}
|
|
356
|
+
console.log(`[Transmute] Creating dynamic tag engine for "${tag}" (${count} signals)`);
|
|
357
|
+
const config = {
|
|
358
|
+
tag,
|
|
359
|
+
execution_mode: 'desktop',
|
|
360
|
+
schedule: 'Daily',
|
|
361
|
+
llm_provider: 'realtimexai',
|
|
362
|
+
llm_model: 'gpt-4o'
|
|
363
|
+
};
|
|
364
|
+
const { error } = await supabase
|
|
365
|
+
.from('engines')
|
|
366
|
+
.insert({
|
|
367
|
+
user_id: userId,
|
|
368
|
+
title: title,
|
|
369
|
+
type: 'newsletter',
|
|
370
|
+
config: config,
|
|
371
|
+
status: 'active'
|
|
372
|
+
});
|
|
373
|
+
if (!error)
|
|
374
|
+
createdCount++;
|
|
375
|
+
}
|
|
376
|
+
if (createdCount > 0) {
|
|
377
|
+
console.log(`[Transmute] Successfully auto-created ${createdCount} dynamic tag engines.`);
|
|
378
|
+
}
|
|
379
|
+
}
|
|
225
380
|
/**
|
|
226
381
|
* Save the generated asset to DB
|
|
227
382
|
*/
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { JSDOM } from 'jsdom';
|
|
2
|
+
import { Readability } from '@mozilla/readability';
|
|
3
|
+
export class DOMSanitizer {
|
|
4
|
+
/**
|
|
5
|
+
* Sanitize HTML using proper DOM parsing (not regex).
|
|
6
|
+
* RETURNS: Cleaned HTML string (not Markdown, not Plain Text).
|
|
7
|
+
*/
|
|
8
|
+
static sanitizeHtml(html) {
|
|
9
|
+
if (!html)
|
|
10
|
+
return "";
|
|
11
|
+
try {
|
|
12
|
+
// 1. Parse DOM
|
|
13
|
+
const dom = new JSDOM(html, { url: "https://example.com" });
|
|
14
|
+
const doc = dom.window.document;
|
|
15
|
+
// 2. Pre-clean: Remove Toxic Tags immediately
|
|
16
|
+
// We do this BEFORE Readability to ensure no scripts sneak in
|
|
17
|
+
const toxicSelectors = [
|
|
18
|
+
'script', 'style', 'noscript', 'svg', 'iframe', 'embed', 'object',
|
|
19
|
+
'meta', 'link', 'head' // We only want body content
|
|
20
|
+
];
|
|
21
|
+
let removedCount = 0;
|
|
22
|
+
toxicSelectors.forEach(tag => {
|
|
23
|
+
const elements = doc.querySelectorAll(tag);
|
|
24
|
+
if (elements.length > 0) {
|
|
25
|
+
console.log(`[DOMSanitizer] Removing ${elements.length} <${tag}> tags`);
|
|
26
|
+
removedCount += elements.length;
|
|
27
|
+
elements.forEach(el => el.remove());
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
console.log(`[DOMSanitizer] Removed ${removedCount} toxic elements total.`);
|
|
31
|
+
// 3. Try Readability (Best for Articles)
|
|
32
|
+
const reader = new Readability(doc);
|
|
33
|
+
const article = reader.parse();
|
|
34
|
+
if (article && article.content) {
|
|
35
|
+
// Return Clean HTML with structure preserved (not textContent)
|
|
36
|
+
return article.content;
|
|
37
|
+
}
|
|
38
|
+
// 4. Fallback: Manual Cleaning (If Readability fails)
|
|
39
|
+
// Remove UI Noise
|
|
40
|
+
const noiseSelectors = [
|
|
41
|
+
'header', 'footer', 'nav', 'aside', 'form',
|
|
42
|
+
'[role="alert"]', '[role="banner"]', '[role="dialog"]',
|
|
43
|
+
'.ad', '.ads', '.advertisement', '.social-share', '#cookie-banner'
|
|
44
|
+
];
|
|
45
|
+
noiseSelectors.forEach(selector => {
|
|
46
|
+
try {
|
|
47
|
+
doc.querySelectorAll(selector).forEach(el => el.remove());
|
|
48
|
+
}
|
|
49
|
+
catch (e) {
|
|
50
|
+
// Ignore selector errors
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
// Return whatever is left in the body as HTML
|
|
54
|
+
return doc.body.innerHTML;
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
console.error('[DOMSanitizer] Parsing failed, falling back to regex strip', error);
|
|
58
|
+
// Emergency fallback: Strip script/style manually, return text
|
|
59
|
+
return html.replace(/<(script|style)[^>]*>[\s\S]*?<\/\1>/gi, '');
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|