npm - @realtimex/realtimex-alchemy - Versions diffs - 1.0.42 → 1.0.44 - Mend

@realtimex/realtimex-alchemy 1.0.42 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/CHANGELOG.md +17 -0
package/dist/api/index.js +120 -0
package/dist/api/services/AlchemistService.js +9 -5
package/dist/api/services/ChatService.js +3 -2
package/dist/api/services/RouterService.js +4 -7
package/dist/api/services/TransmuteService.js +162 -7
package/dist/api/utils/DOMSanitizer.js +62 -0
package/dist/api/utils/contentCleaner.js +71 -383
package/dist/assets/index-BcolxI8u.css +1 -0
package/dist/assets/index-DKtbsbuu.js +125 -0
package/dist/index.html +2 -2
package/dist/shared/constants.js +23 -0
package/package.json +4 -4
package/dist/assets/index-BdYsvKvV.css +0 -1
package/dist/assets/index-BoqZas2I.js +0 -124

package/dist/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.0.44] - 2026-01-26
+### Added
+- **Setup Wizard**: Added a UI-based migration tool to streamline the initial setup. Users can now run database migrations directly from the setup interface.
+- **Migration**: Added `POST /api/migrate` endpoint to trigger database migrations from the frontend, streaming real-time logs via SSE.
+### Improved
+- **Scripts**: Enhanced `migrate.sh` to support non-interactive authentication (via Access Tokens or DB Passwords) and automatic `TOKEN_ENCRYPTION_KEY` generation for Edge Functions.
+## [1.0.43] - 2026-01-26
+### Added
+- **Settings**: Added personalized "Blocked Tags" management in the Intelligence Engine settings, allowing users to override default filters.
+### Improved
+- **Transmute Engine**: Optimized automatic engine creation to only generate newsletter pipelines for active categories (excluding "Other"), significantly reducing noise.
 ## [1.0.42] - 2026-01-26
 ### Added

package/dist/api/index.js CHANGED Viewed

@@ -2,6 +2,7 @@ import express from 'express';
 import cors from 'cors';
 import path from 'path';
 import { fileURLToPath } from 'url';
+import { spawn } from 'child_process';
 import { MinerService } from './services/MinerService.js';
 import { AlchemistService } from './services/AlchemistService.js';
 import { LibrarianService } from './services/LibrarianService.js';
@@ -27,6 +28,109 @@ const events = EventService.getInstance();
 app.get('/health', (req, res) => {
     res.json({ status: 'active', platform: process.platform });
 });
+// Run database migrations (SSE stream)
+app.post('/api/migrate', (req, res) => {
+    const { projectId, dbPassword, accessToken } = req.body;
+    if (!projectId) {
+        return res.status(400).json({ error: 'Project ID is required' });
+    }
+    // Set up SSE for streaming output
+    res.setHeader('Content-Type', 'text/event-stream');
+    res.setHeader('Cache-Control', 'no-cache');
+    res.setHeader('Connection', 'keep-alive');
+    const sendEvent = (type, data) => {
+        res.write(`data: ${JSON.stringify({ type, data })}\n\n`);
+    };
+    sendEvent('info', '🚀 Starting migration...');
+    // Find the migrate.sh script - check multiple possible locations
+    // In dev: api/../scripts/migrate.sh
+    // In prod: dist/api/../../scripts/migrate.sh
+    const possiblePaths = [
+        path.join(__dirname, '..', 'scripts', 'migrate.sh'), // dev mode
+        path.join(__dirname, '..', '..', 'scripts', 'migrate.sh'), // compiled dist/api/
+        path.join(process.cwd(), 'scripts', 'migrate.sh') // fallback to cwd
+    ];
+    const scriptPath = possiblePaths.find(p => fs.existsSync(p));
+    const projectRoot = scriptPath ? path.dirname(path.dirname(scriptPath)) : process.cwd();
+    if (!scriptPath) {
+        sendEvent('error', `Migration script not found. Searched: ${possiblePaths.join(', ')}`);
+        sendEvent('done', 'failed');
+        return res.end();
+    }
+    sendEvent('info', `Found script at: ${scriptPath}`);
+    sendEvent('info', `Working directory: ${projectRoot}`);
+    // Prepare environment - support both access token and database password
+    const env = {
+        ...process.env,
+        SUPABASE_PROJECT_ID: projectId,
+        // Ensure PATH includes common locations for supabase CLI
+        PATH: `${process.env.PATH}:/usr/local/bin:/opt/homebrew/bin:${projectRoot}/node_modules/.bin`
+    };
+    // Access token is preferred for non-interactive auth
+    if (accessToken) {
+        env.SUPABASE_ACCESS_TOKEN = accessToken;
+        sendEvent('info', 'Using access token for authentication');
+    }
+    if (dbPassword) {
+        env.SUPABASE_DB_PASSWORD = dbPassword;
+        sendEvent('info', 'Using database password for authentication');
+    }
+    // Track process state
+    let processCompleted = false;
+    // Spawn the migration script in its own process group
+    const child = spawn('bash', [scriptPath], {
+        env,
+        cwd: projectRoot,
+        stdio: ['ignore', 'pipe', 'pipe'],
+        detached: true // Run in separate process group
+    });
+    // Don't let the parent wait for this child
+    child.unref();
+    sendEvent('info', `Process spawned with PID: ${child.pid}`);
+    child.stdout.on('data', (data) => {
+        const lines = data.toString().split('\n').filter((l) => l.trim());
+        for (const line of lines) {
+            sendEvent('stdout', line);
+        }
+    });
+    child.stderr.on('data', (data) => {
+        const lines = data.toString().split('\n').filter((l) => l.trim());
+        for (const line of lines) {
+            // Supabase CLI outputs progress to stderr, not always errors
+            sendEvent('stderr', line);
+        }
+    });
+    child.on('close', (code, signal) => {
+        processCompleted = true;
+        if (code === 0) {
+            sendEvent('info', '✅ Migration completed successfully!');
+            sendEvent('done', 'success');
+        }
+        else if (signal) {
+            sendEvent('error', `Migration killed by signal: ${signal}`);
+            sendEvent('done', 'failed');
+        }
+        else {
+            sendEvent('error', `Migration failed with exit code ${code}`);
+            sendEvent('done', 'failed');
+        }
+        res.end();
+    });
+    child.on('error', (err) => {
+        processCompleted = true;
+        sendEvent('error', `Failed to start migration: ${err.message}`);
+        sendEvent('done', 'failed');
+        res.end();
+    });
+    // Don't kill the process on client disconnect - let migration complete
+    // The process should finish on its own, and failed writes are handled gracefully
+    let clientConnected = true;
+    req.on('close', () => {
+        clientConnected = false;
+        // Don't kill the process - let it complete
+        console.log('[Migrate] Client disconnected, but migration will continue');
+    });
+});
 // SSE Events
 app.get('/events', (req, res) => {
     res.setHeader('Content-Type', 'text/event-stream');
@@ -458,6 +562,22 @@ if (fs.existsSync(staticPath)) {
             res.status(500).json({ error: error.message || 'Engine brief fetch failed' });
         }
     });
+    app.post('/api/engines/ensure-defaults', async (req, res) => {
+        try {
+            const userId = req.headers['x-user-id'];
+            if (!userId) {
+                return res.status(401).json({ error: 'Unauthorized: Missing User ID' });
+            }
+            console.log(`[API] Ensuring default engines for user ${userId}`);
+            const supabase = getAuthenticatedSupabase(req);
+            await transmuteService.ensureDefaultNewsletterEngines(userId, supabase);
+            res.json({ success: true });
+        }
+        catch (error) {
+            console.error('[API] Failed to ensure default engines:', error);
+            res.status(500).json({ error: error.message || 'Failed to ensure default engines' });
+        }
+    });
     // Client-side routing fallback (Bypass path-to-regexp error in Express 5)
     app.use((req, res, next) => {
         if (!req.path.startsWith('/api') && !req.path.startsWith('/events')) {

package/dist/api/services/AlchemistService.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { embeddingService } from './EmbeddingService.js';
 import { deduplicationService } from './DeduplicationService.js';
 import { SDKService } from './SDKService.js';
 import { ContentCleaner } from '../utils/contentCleaner.js';
+import { transmuteService } from './TransmuteService.js';
 export class AlchemistService {
     processingEvents;
     router;
@@ -111,9 +112,8 @@ export class AlchemistService {
                     let rawContent = result.content;
                     finalUrl = result.finalUrl;
                     if (rawContent && rawContent.length > 20) {
-                        // HIGHLIGHT: Payload Hygiene - Clean Markdown content after conversion
-                        // This strips JS/CSS patterns that survived Turndown
-                        const cleaned = ContentCleaner.cleanContent(rawContent);
+                        // HIGHLIGHT: Payload Hygiene - Content is already cleaned by RouterService
+                        const cleaned = rawContent;
                         // Check if this is a login wall or paywall
                         isGatedContent = ContentCleaner.isGatedContent(cleaned);
                         if (isGatedContent) {
@@ -158,7 +158,7 @@ export class AlchemistService {
                         summary: isGatedContent ? 'Login or subscription required to access this content.' : response.summary,
                         category: response.category,
                         entities: response.entities,
-                        tags: response.tags,
+                        tags: (response.tags || []).map(t => t.toLowerCase().trim()),
                         content: content,
                         // Mark as dismissed if low score OR gated content
                         is_dismissed: response.score < 50 || isGatedContent,
@@ -245,7 +245,11 @@ export class AlchemistService {
             },
             userId
         }, supabase);
-        // 6. Trigger Background Persona Consolidation (don't await)
+        // 6. Trigger Background Engine Discovery (NEW: Dynamically create engines after sync)
+        transmuteService.ensureDefaultNewsletterEngines(userId, supabase).catch(err => {
+            console.error('[AlchemistService] Background engine discovery failed:', err);
+        });
+        // 7. Trigger Background Persona Consolidation (don't await)
         import('./PersonaService.js').then(({ personaService }) => {
             personaService.consolidatePersona(userId, supabase).catch(err => {
                 console.error('[AlchemistService] Background persona update failed:', err);

package/dist/api/services/ChatService.js CHANGED Viewed

@@ -105,12 +105,13 @@ Be concise, helpful, and professional.
                 ...previousMessages.map(m => ({ role: m.role, content: m.content })),
                 { role: 'user', content: finalPrompt } // Current turn with RAG context
             ];
+            console.log('[ChatService] Final Prompt being sent to LLM:', JSON.stringify(messages, null, 2));
             const response = await sdk.llm.chat(messages, {
                 provider: settings.llm_provider || 'realtimexai',
-                model: settings.llm_model || 'gpt-4o-mini' // Default to available model
+                model: settings.llm_model || 'gpt-4o'
             });
             console.log('[ChatService] LLM Response:', JSON.stringify(response, null, 2));
-            const aiContent = response.response?.content || "I'm sorry, I couldn't generate a response.";
+            const aiContent = response.response?.content || "I'm sorry, I couldn't generate a response. The LLM returned empty content.";
             // 6. Save Assistant Message
             const { data: aiMsg, error: aiError } = await supabase
                 .from('chat_messages')

package/dist/api/services/RouterService.js CHANGED Viewed

@@ -1,10 +1,8 @@
 import axios from 'axios';
 import puppeteer from 'puppeteer';
-import TurndownService from 'turndown';
 import { EventService } from './EventService.js';
 import { ContentCleaner } from '../utils/contentCleaner.js';
 export class RouterService {
-    turndown = new TurndownService();
     events = EventService.getInstance();
     async extractContent(url) {
         this.events.emit({ type: 'router', message: `Attempting Tier 1 Extraction (Axios): ${url.substring(0, 30)}...` });
@@ -26,9 +24,8 @@ export class RouterService {
                 // But usually responseUrl is the reliable one.
             }
             const rawHtml = response.data;
-            // Payload Hygiene: Sanitize HTML before Markdown conversion
-            const sanitizedHtml = ContentCleaner.sanitizeHtml(rawHtml);
-            const markdown = this.turndown.turndown(sanitizedHtml);
+            // Payload Hygiene: Full HTML Pipeline (Sanitize -> Markdown -> Polish)
+            const markdown = ContentCleaner.cleanContent(rawHtml);
             if (markdown.length > 500) {
                 this.events.emit({ type: 'router', message: `Tier 1 Success (${markdown.length} chars) -> ${finalUrl.substring(0, 30)}...` });
                 return { content: markdown, finalUrl };
@@ -47,9 +44,9 @@ export class RouterService {
             // Capture final URL from page object
             finalUrl = page.url();
             const content = await page.content();
-            const sanitizedHtml = ContentCleaner.sanitizeHtml(content);
+            // Payload Hygiene: Full HTML Pipeline (Sanitize -> Markdown -> Polish)
+            const markdown = ContentCleaner.cleanContent(content);
             await browser.close();
-            const markdown = this.turndown.turndown(sanitizedHtml);
             this.events.emit({ type: 'router', message: `Tier 2 Success (${markdown.length} chars) -> ${finalUrl.substring(0, 30)}...` });
             return { content: markdown, finalUrl };
         }

package/dist/api/services/TransmuteService.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import path from 'path';
 import os from 'os';
 import { SDKService } from './SDKService.js';
-import { ContentCleaner } from '../utils/contentCleaner.js';
+import { BLOCKED_TAGS as DEFAULT_BLOCKED_TAGS } from '../../shared/constants.js';
 export class TransmuteService {
     /**
      * Run a specific engine pipeline
@@ -119,8 +119,8 @@ export class TransmuteService {
                 summary: s.summary,
                 url: bestUrl, // Best Available (Resolved) URL
                 source_urls: uniqueUrls, // All associated direct URLs
-                // Use ContentCleaner to strip JS/CSS noise
-                content: s.content ? ContentCleaner.cleanContent(s.content) : undefined
+                // Return clean/raw content (already cleaned by RouterService)
+                content: s.content || undefined
             };
         });
         // 4. Construct high-fidelity System Prompt
@@ -169,16 +169,31 @@ export class TransmuteService {
      * Fetch relevant signals based on engine configuration
      */
     async fetchContextSignals(userId, config, supabase) {
+        const maxSignals = config.max_signals || 10;
         let query = supabase
             .from('signals')
             .select('*')
             .eq('user_id', userId)
             .order('score', { ascending: false })
-            .limit(10);
-        if (config.category && config.category !== 'All') {
-            query = query.eq('category', config.category);
+            .limit(maxSignals);
+        // Support both single category (legacy) and multi-select categories
+        const categories = Array.isArray(config.category) ? config.category : (config.category ? [config.category] : []);
+        if (categories.length > 0 && !categories.includes('All')) {
+            // Use OR logic for multiple categories
+            query = query.in('category', categories);
         }
-        const { data } = await query;
+        // HIGHLIGHT: Support tag-based filtering (Dynamic Tag Engines)
+        if (config.tag) {
+            const normalizedTag = config.tag.toLowerCase().trim();
+            console.log(`[Transmute] Filtering by tag: "${normalizedTag}" (original: "${config.tag}")`);
+            query = query.contains('tags', [normalizedTag]);
+        }
+        const { data, error } = await query;
+        if (error) {
+            console.error('[Transmute] Signal query failed:', error);
+            return [];
+        }
+        console.log(`[Transmute] Retrieved ${data?.length || 0} signals for user ${userId}`);
         return (data || []);
     }
     /**
@@ -222,6 +237,146 @@ export class TransmuteService {
         });
         return response.response?.content || "Failed to generate content.";
     }
+    /**
+     * Ensure default newsletter engines exist for each category
+     */
+    /**
+     * Ensure default newsletter engines exist for each category
+     * This is "Self-Healing": it creates missing engines based on discovered signals.
+     */
+    async ensureDefaultNewsletterEngines(userId, supabase) {
+        console.log(`[Transmute] Running Self-Healing Engine Discovery for user ${userId}...`);
+        // 1. Fetch Active Categories from Signals
+        // We only create engines for categories that actually have data.
+        const { data: signalStats } = await supabase
+            .from('signals')
+            .select('category')
+            .eq('user_id', userId);
+        const activeCategories = new Set(signalStats?.map(s => s.category).filter(Boolean) || []);
+        // 2. Fetch Existing Pipelines to avoid duplicates
+        const { data: existingEngines } = await supabase
+            .from('engines')
+            .select('title, config')
+            .eq('user_id', userId)
+            .eq('type', 'newsletter');
+        const existingTitles = new Set(existingEngines?.map(e => e.title) || []);
+        const existingCategories = new Set(existingEngines?.map(e => e.config?.category).filter(Boolean));
+        const existingTags = new Set(existingEngines?.map(e => e.config?.tag?.toLowerCase().trim()).filter(Boolean));
+        // 3. Create Engines for Active Categories (Exclude 'Other')
+        for (const category of activeCategories) {
+            // Filter out 'Other' (case-insensitive) and empty strings
+            if (!category || category.toLowerCase() === 'other')
+                continue;
+            // STRICT 1:1 CHECK: Skip if title exists OR category is already covered
+            const title = `${category} Daily`;
+            if (existingTitles.has(title) || existingCategories.has(category)) {
+                continue;
+            }
+            console.log(`[Transmute] Bootstrapping missing category engine: ${title}`);
+            const config = {
+                category,
+                execution_mode: 'desktop',
+                schedule: 'Daily',
+                llm_provider: 'realtimexai',
+                llm_model: 'gpt-4o',
+                max_signals: 30,
+                custom_prompt: `Create a comprehensive daily newsletter focused on ${category}. Highlight the most important developments, key insights, and actionable takeaways. Use a professional, insight-driven tone with clear structure: start with 'The Big Story' followed by 'Quick Hits' for other notable items.`
+            };
+            await supabase
+                .from('engines')
+                .insert({
+                user_id: userId,
+                title: title,
+                type: 'newsletter',
+                config: config,
+                status: 'active'
+            });
+        }
+        // 4. Dynamic Tag-Based Categories
+        // De-prioritized for now to reduce noise.
+        // Only run if the threshold is very high or if category mapping isn't enough.
+        // await this.ensureDynamicTagEngines(userId, supabase, existingTitles, existingTags);
+    }
+    /**
+     * Find popular tags and create engines for them
+     * Treat popular tags as "dynamic categories" with 1:1 mapping.
+     */
+    async ensureDynamicTagEngines(userId, supabase, existingTitles, existingTags) {
+        // 1. Fetch all tags for the user
+        const { data: signals } = await supabase
+            .from('signals')
+            .select('tags')
+            .eq('user_id', userId);
+        if (!signals || signals.length === 0)
+            return;
+        // 2. Fetch User Settings (for blocked tags)
+        const { data: settings } = await supabase
+            .from('alchemy_settings')
+            .select('blocked_tags')
+            .eq('user_id', userId)
+            .maybeSingle();
+        const userBlockedTags = new Set((settings?.blocked_tags || []).map(t => t.toLowerCase().trim()));
+        // Use user blocked tags if they exist, otherwise fallback to system defaults
+        const systemDefaults = Array.from(DEFAULT_BLOCKED_TAGS).map(t => t.toLowerCase().trim());
+        const BLOCKED_TAGS = userBlockedTags.size > 0 ? userBlockedTags : new Set(systemDefaults);
+        // Core category terms to skip (already handled by category discovery)
+        const CORE_CATEGORY_TERMS = new Set([
+            'ai', 'ml', 'machine learning', 'artificial intelligence',
+            'business', 'technology', 'tech', 'finance', 'financial',
+            'crypto', 'cryptocurrency', 'bitcoin', 'science', 'scientific',
+            'politics', 'political', 'government'
+        ]);
+        const tagCounts = new Map();
+        const DYNAMIC_THRESHOLD = 50; // Increased significantly to reduce noise for now
+        const MAX_NEW_PER_RUN = 3;
+        signals.forEach(s => {
+            const tags = (s.tags || []);
+            tags.forEach(tag => {
+                const lower = tag.toLowerCase().trim();
+                // Filter out: empty, short, blocked, or core category overlaps
+                // Added check for 'redirect' and other junk specifically
+                if (!lower || lower.length < 3 || BLOCKED_TAGS.has(lower) || CORE_CATEGORY_TERMS.has(lower) || lower.includes('redirect')) {
+                    return;
+                }
+                tagCounts.set(lower, (tagCounts.get(lower) || 0) + 1);
+            });
+        });
+        // 3. Identify and Sort Candidates
+        const candidates = Array.from(tagCounts.entries())
+            .filter(([_, count]) => count >= DYNAMIC_THRESHOLD)
+            .sort((a, b) => b[1] - a[1]);
+        let createdCount = 0;
+        for (const [tag, count] of candidates) {
+            const displayName = tag.split(' ').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
+            const title = `${displayName} Daily`;
+            // STRICT 1:1 CHECK: Skip if title exists OR tag is already covered
+            if (existingTitles.has(title) || existingTags.has(tag)) {
+                continue;
+            }
+            console.log(`[Transmute] Creating dynamic tag engine for "${tag}" (${count} signals)`);
+            const config = {
+                tag,
+                execution_mode: 'desktop',
+                schedule: 'Daily',
+                llm_provider: 'realtimexai',
+                llm_model: 'gpt-4o'
+            };
+            const { error } = await supabase
+                .from('engines')
+                .insert({
+                user_id: userId,
+                title: title,
+                type: 'newsletter',
+                config: config,
+                status: 'active'
+            });
+            if (!error)
+                createdCount++;
+        }
+        if (createdCount > 0) {
+            console.log(`[Transmute] Successfully auto-created ${createdCount} dynamic tag engines.`);
+        }
+    }
     /**
      * Save the generated asset to DB
      */

package/dist/api/utils/DOMSanitizer.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { JSDOM } from 'jsdom';
+import { Readability } from '@mozilla/readability';
+export class DOMSanitizer {
+    /**
+     * Sanitize HTML using proper DOM parsing (not regex).
+     * RETURNS: Cleaned HTML string (not Markdown, not Plain Text).
+     */
+    static sanitizeHtml(html) {
+        if (!html)
+            return "";
+        try {
+            // 1. Parse DOM
+            const dom = new JSDOM(html, { url: "https://example.com" });
+            const doc = dom.window.document;
+            // 2. Pre-clean: Remove Toxic Tags immediately
+            // We do this BEFORE Readability to ensure no scripts sneak in
+            const toxicSelectors = [
+                'script', 'style', 'noscript', 'svg', 'iframe', 'embed', 'object',
+                'meta', 'link', 'head' // We only want body content
+            ];
+            let removedCount = 0;
+            toxicSelectors.forEach(tag => {
+                const elements = doc.querySelectorAll(tag);
+                if (elements.length > 0) {
+                    console.log(`[DOMSanitizer] Removing ${elements.length} <${tag}> tags`);
+                    removedCount += elements.length;
+                    elements.forEach(el => el.remove());
+                }
+            });
+            console.log(`[DOMSanitizer] Removed ${removedCount} toxic elements total.`);
+            // 3. Try Readability (Best for Articles)
+            const reader = new Readability(doc);
+            const article = reader.parse();
+            if (article && article.content) {
+                // Return Clean HTML with structure preserved (not textContent)
+                return article.content;
+            }
+            // 4. Fallback: Manual Cleaning (If Readability fails)
+            // Remove UI Noise
+            const noiseSelectors = [
+                'header', 'footer', 'nav', 'aside', 'form',
+                '[role="alert"]', '[role="banner"]', '[role="dialog"]',
+                '.ad', '.ads', '.advertisement', '.social-share', '#cookie-banner'
+            ];
+            noiseSelectors.forEach(selector => {
+                try {
+                    doc.querySelectorAll(selector).forEach(el => el.remove());
+                }
+                catch (e) {
+                    // Ignore selector errors
+                }
+            });
+            // Return whatever is left in the body as HTML
+            return doc.body.innerHTML;
+        }
+        catch (error) {
+            console.error('[DOMSanitizer] Parsing failed, falling back to regex strip', error);
+            // Emergency fallback: Strip script/style manually, return text
+            return html.replace(/<(script|style)[^>]*>[\s\S]*?<\/\1>/gi, '');
+        }
+    }
+}