@realtimex/realtimex-alchemy 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/CHANGELOG.md CHANGED
@@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.0.41] - 2026-01-24
9
+
10
+ ### Added
11
+ - **Active Learning**: Implemented "Boost-to-Embed" workflow. Boosting a signal in the UI now triggers the backend to generate a vector embedding for that signal, actively improving the AI's future context retrieval.
12
+ - **Deduplication**: Enhanced deduplication engine with Title-based heuristics to catch duplicates across different URLs (e.g., tracking links) and added explicit exact-URL matching.
13
+
14
+ ### Improved
15
+ - **Data Retention**: The Alchemist engine now persists *all* mined signals, including low-scoring ones (marked as auto-dismissed). This allows users to review and rescue missed opportunities via the new System Logs filters.
16
+ - **RAG**: Tuned Chat retrieval parameters (Lowered threshold to 0.55, Increased limit to 10) to significantly improve the recall of relevant historical context during conversations.
17
+ - **UI**: Connected the System Logs "Boost" action to the new API endpoint, ensuring immediate feedback loops for the AI.
18
+
19
+ ## [1.0.40] - 2026-01-24
20
+
21
+ ### Improved
22
+ - **UI**: Standardized the styling of disabled input fields in Account Settings (Email field) to match the new `bg-black/5` convention, ensuring consistent visual language and readability across the app.
23
+
8
24
  ## [1.0.39] - 2026-01-24
9
25
 
10
26
  ### Fixed
package/dist/api/index.js CHANGED
@@ -357,6 +357,62 @@ app.post('/api/chat/message', async (req, res) => {
357
357
  res.status(500).json({ success: false, message: e.message });
358
358
  }
359
359
  });
360
+ // Active Learning: Boost Signal & Trigger Embedding
361
+ app.post('/api/signals/:id/boost', async (req, res) => {
362
+ try {
363
+ const userId = req.headers['x-user-id'];
364
+ const signalId = req.params.id;
365
+ const { value, settings: bodySettings } = req.body; // value = true/false
366
+ console.log(`[API] Boost Request for ${signalId}: Value=${value}, User=${userId ? 'Present' : 'Missing'}`);
367
+ if (!userId) {
368
+ console.warn('[API] Boost failed: Missing x-user-id header');
369
+ return res.status(401).json({ error: 'Unauthorized: Missing User ID' });
370
+ }
371
+ const supabase = getAuthenticatedSupabase(req);
372
+ // 1. Update Signal in DB
373
+ const { data: signal, error } = await supabase
374
+ .from('signals')
375
+ .update({
376
+ is_boosted: value,
377
+ // If boosting, also un-dismiss it if it was dismissed
378
+ ...(value ? { is_dismissed: false } : {})
379
+ })
380
+ .eq('id', signalId)
381
+ .select()
382
+ .single();
383
+ if (error)
384
+ throw error;
385
+ // 2. If Boosting, Check/Generate Embedding
386
+ if (value === true && signal) {
387
+ // Fetch settings if not provided
388
+ let settings = bodySettings;
389
+ if (!settings || Object.keys(settings).length === 0) {
390
+ const { data: dbSettings } = await supabase
391
+ .from('alchemy_settings')
392
+ .select('*')
393
+ .eq('user_id', userId)
394
+ .single();
395
+ settings = dbSettings || {};
396
+ }
397
+ // Check if embedding works
398
+ if (await SDKService.isAvailable()) {
399
+ console.log(`[API] Triggering Active Learning embedding for boosted signal: ${signal.title}`);
400
+ // Public method to process embedding (exposed via casting or refactor)
401
+ // Since processEmbedding is private, we can use a small hack or public wrapper.
402
+ // For now, let's expose specific functionality or reuse the logic.
403
+ // BETTER: Add a public method to AlchemistService 'promoteToRelevant(signal, settings)'
404
+ // Calling the private method via 'any' cast for expediency,
405
+ // but cleaner architecture would be AlchemistService.promoteSignal()
406
+ await alchemist.processEmbedding(signal, settings, userId, supabase);
407
+ }
408
+ }
409
+ res.json({ success: true, signal });
410
+ }
411
+ catch (e) {
412
+ console.error('[API] Boost failed:', e);
413
+ res.status(500).json({ success: false, message: e.message });
414
+ }
415
+ });
360
416
  // Unified Static Assets Serving
361
417
  const staticPath = process.env.ELECTRON_STATIC_PATH || path.join(__dirname, '..', '..', 'dist');
362
418
  if (fs.existsSync(staticPath)) {
@@ -130,40 +130,43 @@ export class AlchemistService {
130
130
  // 3. LLM Analysis
131
131
  const response = await this.analyzeContent(content, entry.url, settings, learningContext);
132
132
  const duration = Date.now() - startAnalysis;
133
- if (response.relevant) {
134
- // Emit: Signal Found
135
- await this.processingEvents.log({
136
- eventType: 'action',
137
- agentState: 'Signal',
138
- message: `Found signal: ${response.summary} (${response.score}%)`,
139
- level: 'info',
140
- metadata: response,
141
- durationMs: duration,
142
- userId
143
- }, supabase);
144
- // 4. Save Signal
145
- console.log('[AlchemistService] Saving signal to database...');
146
- const { data: insertedSignal, error: insertError } = await supabase
147
- .from('signals')
148
- .insert([{
149
- user_id: userId,
150
- url: entry.url,
151
- title: entry.title,
152
- score: response.score,
153
- summary: response.summary,
154
- category: response.category,
155
- entities: response.entities,
156
- tags: response.tags,
157
- content: content
158
- }])
159
- .select()
160
- .single();
161
- if (insertError) {
162
- console.error('[AlchemistService] Insert error:', insertError);
163
- stats.errors++;
164
- }
165
- else {
166
- console.log('[AlchemistService] Signal saved successfully');
133
+ // 4. Save Signal (ALWAYS save for Active Learning - Low scores = candidates for boost)
134
+ console.log(`[AlchemistService] Saving signal (${response.score}%)...`);
135
+ const { data: insertedSignal, error: insertError } = await supabase
136
+ .from('signals')
137
+ .insert([{
138
+ user_id: userId,
139
+ url: entry.url,
140
+ title: entry.title,
141
+ score: response.score,
142
+ summary: response.summary,
143
+ category: response.category,
144
+ entities: response.entities,
145
+ tags: response.tags,
146
+ content: content,
147
+ // Mark as dismissed if low score so it doesn't clutter main feed,
148
+ // but is available in "Low" filter
149
+ is_dismissed: response.score < 50
150
+ }])
151
+ .select()
152
+ .single();
153
+ if (insertError) {
154
+ console.error('[AlchemistService] Insert error:', insertError);
155
+ stats.errors++;
156
+ }
157
+ else {
158
+ console.log('[AlchemistService] Signal saved successfully');
159
+ if (response.relevant) {
160
+ // High/Medium Score: Emit Signal Found & Auto-Embed
161
+ await this.processingEvents.log({
162
+ eventType: 'action',
163
+ agentState: 'Signal',
164
+ message: `Found signal: ${response.summary} (${response.score}%)`,
165
+ level: 'info',
166
+ metadata: response,
167
+ durationMs: duration,
168
+ userId
169
+ }, supabase);
167
170
  stats.signals++;
168
171
  // 5. Generate Embedding & Check for Duplicates (non-blocking)
169
172
  if (settings.embedding_model && await embeddingService.isAvailable()) {
@@ -172,18 +175,24 @@ export class AlchemistService {
172
175
  });
173
176
  }
174
177
  }
175
- }
176
- else {
177
- // Emit: Skipped
178
- await this.processingEvents.log({
179
- eventType: 'info',
180
- agentState: 'Skipped',
181
- message: `Irrelevant content (${response.score}%): ${entry.title}`,
182
- level: 'debug',
183
- durationMs: duration,
184
- userId
185
- }, supabase);
186
- stats.skipped++;
178
+ else {
179
+ // Low Score: Emit Skipped (but it IS saved in DB now)
180
+ // Trigger metadata-based deduplication (no embedding) to merge tracking links/redirects
181
+ this.processDeduplicationOnly(insertedSignal, settings, userId, supabase).catch((err) => {
182
+ console.error('[AlchemistService] Deduplication check failed:', err);
183
+ });
184
+ await this.processingEvents.log({
185
+ eventType: 'info',
186
+ agentState: 'Skipped',
187
+ message: `Low signal stored for review (${response.score}%): ${entry.title}`,
188
+ level: 'debug',
189
+ durationMs: duration,
190
+ userId
191
+ }, supabase);
192
+ // We count it as 'skipped' for the summary stats even though it's physically in the DB,
193
+ // because it's not a "Found Signal" in the user's main feed context.
194
+ stats.skipped++;
195
+ }
187
196
  }
188
197
  }
189
198
  catch (error) {
@@ -316,6 +325,23 @@ export class AlchemistService {
316
325
  return { score: 0, summary: 'Failed to parse', category: 'Error', entities: [], tags: [], relevant: false };
317
326
  }
318
327
  }
328
+ /**
329
+ * Process deduplication without generating embedding (Metadata only)
330
+ * Used for low-score signals to merge duplicates based on Title/URL
331
+ */
332
+ async processDeduplicationOnly(signal, settings, userId, supabase) {
333
+ // Check for duplicates using null embedding (forces metadata check)
334
+ const dedupeResult = await deduplicationService.checkAndMergeDuplicate(signal, null, // No embedding
335
+ userId, supabase, settings);
336
+ if (dedupeResult.isDuplicate) {
337
+ console.log(`[AlchemistService] Low-score signal is duplicate, merged into: ${dedupeResult.mergedSignalId}`);
338
+ // Delete the newly inserted signal since it's a duplicate
339
+ await supabase
340
+ .from('signals')
341
+ .delete()
342
+ .eq('id', signal.id);
343
+ }
344
+ }
319
345
  /**
320
346
  * Process embedding generation and deduplication for a signal
321
347
  * This runs asynchronously and doesn't block the main mining pipeline
@@ -64,9 +64,10 @@ export class ChatService {
64
64
  let sources = [];
65
65
  // 3. Retrieve Context (if embedding checks out)
66
66
  if (queryEmbedding) {
67
- const similar = await embeddingService.findSimilarSignals(queryEmbedding, userId, 0.75, // Threshold
68
- 5 // Top K
67
+ const similar = await embeddingService.findSimilarSignals(queryEmbedding, userId, 0.55, // Lowered threshold for better recall
68
+ 10 // Increased Top K
69
69
  );
70
+ console.log(`[ChatService] RAG Retrieval: Found ${similar.length} signals for query: "${content}"`);
70
71
  if (similar.length > 0) {
71
72
  sources = similar.map(s => ({
72
73
  id: s.id,
@@ -16,22 +16,50 @@ export class DeduplicationService {
16
16
  */
17
17
  async checkAndMergeDuplicate(signal, embedding, userId, supabase, settings) {
18
18
  try {
19
- // Find similar signals
20
- const similar = await embeddingService.findSimilarSignals(embedding, userId, this.SIMILARITY_THRESHOLD, 5 // Check top 5 matches
21
- );
22
- if (similar.length === 0) {
23
- return { isDuplicate: false };
19
+ // 1. Semantic Check (if embedding exists)
20
+ if (embedding && embedding.length > 0) {
21
+ const similar = await embeddingService.findSimilarSignals(embedding, userId, this.SIMILARITY_THRESHOLD, 5 // Check top 5 matches
22
+ );
23
+ if (similar.length > 0) {
24
+ const bestMatch = similar[0];
25
+ console.log(`[Deduplication] Found semantic duplicate: ${bestMatch.id} (score: ${bestMatch.score})`);
26
+ const mergedId = await this.mergeSignals(bestMatch.id, signal, userId, supabase, settings);
27
+ return { isDuplicate: true, mergedSignalId: mergedId, similarityScore: bestMatch.score };
28
+ }
24
29
  }
25
- // Get the most similar signal
26
- const bestMatch = similar[0];
27
- console.log(`[Deduplication] Found similar signal: ${bestMatch.id} (score: ${bestMatch.score})`);
28
- // Merge signals
29
- const mergedId = await this.mergeSignals(bestMatch.id, signal, userId, supabase, settings);
30
- return {
31
- isDuplicate: true,
32
- mergedSignalId: mergedId,
33
- similarityScore: bestMatch.score
34
- };
30
+ // 2. Title Match Check (Metadata Heuristic)
31
+ // Useful for redirected URLs or tracking links where content is same but URL differs
32
+ if (signal.title && signal.title.length > 10) {
33
+ const { data: titleMatch } = await supabase
34
+ .from('signals')
35
+ .select('id, score, title')
36
+ .eq('user_id', userId)
37
+ .ilike('title', signal.title.trim()) // Case-insensitive match
38
+ .neq('id', signal.id || '00000000-0000-0000-0000-000000000000') // Don't match self
39
+ .order('created_at', { ascending: false })
40
+ .limit(1)
41
+ .maybeSingle();
42
+ if (titleMatch) {
43
+ console.log(`[Deduplication] Found title match: ${titleMatch.id} ("${titleMatch.title}")`);
44
+ const mergedId = await this.mergeSignals(titleMatch.id, signal, userId, supabase, settings);
45
+ return { isDuplicate: true, mergedSignalId: mergedId, similarityScore: 0.95 }; // High confidence
46
+ }
47
+ }
48
+ // 3. Exact URL Check (Fallback for signals without embeddings)
49
+ // Even if semantic check failed (or skipped), we shouldn't save the exact same URL twice.
50
+ const { data: existingUrlMatch } = await supabase
51
+ .from('signals')
52
+ .select('id, score')
53
+ .eq('user_id', userId)
54
+ .eq('url', signal.url)
55
+ .neq('id', signal.id || '00000000-0000-0000-0000-000000000000') // Don't match self
56
+ .maybeSingle();
57
+ if (existingUrlMatch) {
58
+ console.log(`[Deduplication] Found exact URL match: ${existingUrlMatch.id}`);
59
+ const mergedId = await this.mergeSignals(existingUrlMatch.id, signal, userId, supabase, settings);
60
+ return { isDuplicate: true, mergedSignalId: mergedId, similarityScore: 1.0 };
61
+ }
62
+ return { isDuplicate: false };
35
63
  }
36
64
  catch (error) {
37
65
  console.error('[Deduplication] Error:', error.message);