@realtimex/realtimex-alchemy 1.0.23 → 1.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ import axios from 'axios';
2
+ /**
3
+ * RealTimeX SDK Client
4
+ * Provides access to RealTimeX Desktop APIs for LLM, embeddings, and vector storage
5
+ */
6
+ export class RealTimeXSDK {
7
+ client;
8
+ appId;
9
+ baseUrl;
10
+ constructor(baseUrl = 'http://localhost:3001') {
11
+ this.baseUrl = baseUrl;
12
+ // RTX_APP_ID is injected by RealTimeX when starting the app
13
+ this.appId = process.env.RTX_APP_ID || '';
14
+ this.client = axios.create({
15
+ baseURL: this.baseUrl,
16
+ headers: {
17
+ 'X-App-Id': this.appId,
18
+ 'Content-Type': 'application/json'
19
+ },
20
+ timeout: 30000 // 30 second timeout
21
+ });
22
+ }
23
+ /**
24
+ * Generate embeddings for text inputs
25
+ * @param input - Single string or array of strings to embed
26
+ * @param provider - Provider name (realtimexai, openai, gemini)
27
+ * @param model - Model name (e.g., text-embedding-3-small)
28
+ * @returns Array of embedding vectors
29
+ */
30
+ async generateEmbedding(input, provider = 'realtimexai', model = 'text-embedding-3-small') {
31
+ try {
32
+ const response = await this.client.post('/sdk/llm/embed', {
33
+ input: Array.isArray(input) ? input : [input],
34
+ provider,
35
+ model
36
+ });
37
+ if (!response.data.success) {
38
+ throw new Error(response.data.error || 'Embedding generation failed');
39
+ }
40
+ return response.data.embeddings;
41
+ }
42
+ catch (error) {
43
+ console.error('[RealTimeXSDK] Embedding error:', error.message);
44
+ throw error;
45
+ }
46
+ }
47
+ /**
48
+ * Store vectors in RealTimeX managed vector storage
49
+ * @param vectors - Array of vectors with metadata
50
+ * @param workspaceId - Workspace/namespace for isolation
51
+ */
52
+ async upsertVectors(vectors, workspaceId = 'default') {
53
+ try {
54
+ const response = await this.client.post('/sdk/llm/vectors/upsert', {
55
+ vectors,
56
+ workspaceId
57
+ });
58
+ if (!response.data.success) {
59
+ throw new Error(response.data.error || 'Vector upsert failed');
60
+ }
61
+ }
62
+ catch (error) {
63
+ console.error('[RealTimeXSDK] Vector upsert error:', error.message);
64
+ throw error;
65
+ }
66
+ }
67
+ /**
68
+ * Query vectors for semantic similarity search
69
+ * @param queryVector - Query embedding vector
70
+ * @param topK - Number of results to return
71
+ * @param workspaceId - Workspace/namespace to search in
72
+ * @param filter - Optional metadata filter
73
+ * @returns Array of similar vectors with scores
74
+ */
75
+ async queryVectors(queryVector, topK = 10, workspaceId = 'default', filter) {
76
+ try {
77
+ const response = await this.client.post('/sdk/llm/vectors/query', {
78
+ vector: queryVector,
79
+ topK,
80
+ workspaceId,
81
+ filter
82
+ });
83
+ if (!response.data.success) {
84
+ throw new Error(response.data.error || 'Vector query failed');
85
+ }
86
+ return response.data.results || [];
87
+ }
88
+ catch (error) {
89
+ console.error('[RealTimeXSDK] Vector query error:', error.message);
90
+ throw error;
91
+ }
92
+ }
93
+ /**
94
+ * Delete vectors from storage
95
+ * @param workspaceId - Workspace to delete from
96
+ * @param deleteAll - If true, deletes all vectors in workspace
97
+ */
98
+ async deleteVectors(workspaceId, deleteAll = false) {
99
+ try {
100
+ const response = await this.client.post('/sdk/llm/vectors/delete', {
101
+ workspaceId,
102
+ deleteAll
103
+ });
104
+ if (!response.data.success) {
105
+ throw new Error(response.data.error || 'Vector deletion failed');
106
+ }
107
+ }
108
+ catch (error) {
109
+ console.error('[RealTimeXSDK] Vector delete error:', error.message);
110
+ throw error;
111
+ }
112
+ }
113
+ /**
114
+ * List available LLM and embedding providers
115
+ * @returns Provider configuration
116
+ */
117
+ async getProviders() {
118
+ try {
119
+ const response = await this.client.get('/sdk/llm/providers');
120
+ if (!response.data.success) {
121
+ throw new Error(response.data.error || 'Failed to fetch providers');
122
+ }
123
+ return response.data;
124
+ }
125
+ catch (error) {
126
+ console.error('[RealTimeXSDK] Get providers error:', error.message);
127
+ throw error;
128
+ }
129
+ }
130
+ /**
131
+ * List all vector storage workspaces
132
+ * @returns Array of workspace IDs
133
+ */
134
+ async listVectorWorkspaces() {
135
+ try {
136
+ const response = await this.client.get('/sdk/llm/vectors/workspaces');
137
+ if (!response.data.success) {
138
+ throw new Error(response.data.error || 'Failed to list workspaces');
139
+ }
140
+ return response.data.workspaces || [];
141
+ }
142
+ catch (error) {
143
+ console.error('[RealTimeXSDK] List workspaces error:', error.message);
144
+ throw error;
145
+ }
146
+ }
147
+ /**
148
+ * Check if SDK is properly configured
149
+ * @returns True if app ID is set
150
+ */
151
+ isConfigured() {
152
+ return !!this.appId;
153
+ }
154
+ /**
155
+ * Get current app ID
156
+ * @returns App ID or empty string
157
+ */
158
+ getAppId() {
159
+ return this.appId;
160
+ }
161
+ }
162
+ // Export singleton instance
163
+ export const realtimeXSDK = new RealTimeXSDK();
@@ -1,6 +1,8 @@
1
1
  import OpenAI from 'openai';
2
2
  import { ProcessingEventService } from './ProcessingEventService.js';
3
3
  import { RouterService } from './RouterService.js';
4
+ import { embeddingService } from './EmbeddingService.js';
5
+ import { deduplicationService } from './DeduplicationService.js';
4
6
  export class AlchemistService {
5
7
  processingEvents;
6
8
  router;
@@ -86,7 +88,9 @@ export class AlchemistService {
86
88
  }, supabase);
87
89
  // 4. Save Signal
88
90
  console.log('[AlchemistService] Saving signal to database...');
89
- const { error: insertError } = await supabase.from('signals').insert([{
91
+ const { data: insertedSignal, error: insertError } = await supabase
92
+ .from('signals')
93
+ .insert([{
90
94
  user_id: userId,
91
95
  url: entry.url,
92
96
  title: entry.title,
@@ -96,7 +100,9 @@ export class AlchemistService {
96
100
  entities: response.entities,
97
101
  tags: response.tags,
98
102
  content: content
99
- }]);
103
+ }])
104
+ .select()
105
+ .single();
100
106
  if (insertError) {
101
107
  console.error('[AlchemistService] Insert error:', insertError);
102
108
  stats.errors++;
@@ -104,6 +110,12 @@ export class AlchemistService {
104
110
  else {
105
111
  console.log('[AlchemistService] Signal saved successfully');
106
112
  stats.signals++;
113
+ // 5. Generate Embedding & Check for Duplicates (non-blocking)
114
+ if (settings.embedding_model && await embeddingService.isAvailable()) {
115
+ this.processEmbedding(insertedSignal, settings, userId, supabase).catch((err) => {
116
+ console.error('[AlchemistService] Embedding processing failed:', err);
117
+ });
118
+ }
107
119
  }
108
120
  }
109
121
  else {
@@ -238,4 +250,52 @@ export class AlchemistService {
238
250
  return { score: 0, summary: 'Failed to parse', category: 'Error', entities: [], tags: [], relevant: false };
239
251
  }
240
252
  }
253
+ /**
254
+ * Process embedding generation and deduplication for a signal
255
+ * This runs asynchronously and doesn't block the main mining pipeline
256
+ */
257
+ async processEmbedding(signal, settings, userId, supabase) {
258
+ try {
259
+ console.log('[AlchemistService] Generating embedding for signal:', signal.id);
260
+ // Generate embedding
261
+ const text = `${signal.title} ${signal.summary}`;
262
+ const embedding = await embeddingService.generateEmbedding(text, settings);
263
+ if (!embedding) {
264
+ console.warn('[AlchemistService] Embedding generation returned null, skipping');
265
+ return;
266
+ }
267
+ // Check for duplicates
268
+ const dedupeResult = await deduplicationService.checkAndMergeDuplicate(signal, embedding, userId, supabase);
269
+ if (dedupeResult.isDuplicate) {
270
+ console.log(`[AlchemistService] Signal is duplicate, merged into: ${dedupeResult.mergedSignalId}`);
271
+ // Delete the newly inserted signal since it's a duplicate
272
+ await supabase
273
+ .from('signals')
274
+ .delete()
275
+ .eq('id', signal.id);
276
+ return;
277
+ }
278
+ // Store embedding in RealTimeX vector storage
279
+ await embeddingService.storeSignalEmbedding(signal.id, embedding, {
280
+ title: signal.title,
281
+ summary: signal.summary,
282
+ url: signal.url,
283
+ category: signal.category,
284
+ userId
285
+ });
286
+ // Update signal metadata
287
+ await supabase
288
+ .from('signals')
289
+ .update({
290
+ has_embedding: true,
291
+ embedding_model: settings.embedding_model
292
+ })
293
+ .eq('id', signal.id);
294
+ console.log('[AlchemistService] Embedding processed successfully for signal:', signal.id);
295
+ }
296
+ catch (error) {
297
+ console.error('[AlchemistService] Embedding processing error:', error.message);
298
+ // Don't throw - we don't want to fail the entire mining process
299
+ }
300
+ }
241
301
  }
@@ -0,0 +1,146 @@
1
+ import { embeddingService } from './EmbeddingService.js';
2
+ /**
3
+ * Deduplication Service
4
+ * Handles smart signal merging based on semantic similarity
5
+ */
6
+ export class DeduplicationService {
7
+ SIMILARITY_THRESHOLD = 0.85;
8
+ /**
9
+ * Check if signal is a duplicate and merge if necessary
10
+ * @param signal - New signal to check
11
+ * @param embedding - Signal embedding vector
12
+ * @param userId - User ID
13
+ * @param supabase - Supabase client
14
+ * @returns Deduplication result
15
+ */
16
+ async checkAndMergeDuplicate(signal, embedding, userId, supabase) {
17
+ try {
18
+ // Find similar signals
19
+ const similar = await embeddingService.findSimilarSignals(embedding, userId, this.SIMILARITY_THRESHOLD, 5 // Check top 5 matches
20
+ );
21
+ if (similar.length === 0) {
22
+ return { isDuplicate: false };
23
+ }
24
+ // Get the most similar signal
25
+ const bestMatch = similar[0];
26
+ console.log(`[Deduplication] Found similar signal: ${bestMatch.id} (score: ${bestMatch.score})`);
27
+ // Merge signals
28
+ const mergedId = await this.mergeSignals(bestMatch.id, signal, userId, supabase);
29
+ return {
30
+ isDuplicate: true,
31
+ mergedSignalId: mergedId,
32
+ similarityScore: bestMatch.score
33
+ };
34
+ }
35
+ catch (error) {
36
+ console.error('[Deduplication] Error:', error.message);
37
+ return { isDuplicate: false };
38
+ }
39
+ }
40
+ /**
41
+ * Merge new signal into existing signal
42
+ * @param existingSignalId - ID of existing signal
43
+ * @param newSignal - New signal to merge
44
+ * @param userId - User ID
45
+ * @param supabase - Supabase client
46
+ * @returns Merged signal ID
47
+ */
48
+ async mergeSignals(existingSignalId, newSignal, userId, supabase) {
49
+ // Fetch existing signal
50
+ const { data: existing, error } = await supabase
51
+ .from('signals')
52
+ .select('*')
53
+ .eq('id', existingSignalId)
54
+ .eq('user_id', userId)
55
+ .single();
56
+ if (error || !existing) {
57
+ console.error('[Deduplication] Failed to fetch existing signal:', error);
58
+ return existingSignalId;
59
+ }
60
+ // Calculate boosted score
61
+ const mentionCount = (existing.mention_count || 1) + 1;
62
+ const scoreBoost = Math.min(mentionCount * 0.1, 0.5); // Max 50% boost
63
+ const newScore = Math.min(existing.score + scoreBoost, 10); // Cap at 10
64
+ // Combine summaries (use longer one)
65
+ const combinedSummary = this.combineSummaries(existing.summary, newSignal.summary);
66
+ // Track source URLs in metadata
67
+ const existingUrls = existing.metadata?.source_urls || [existing.url];
68
+ const sourceUrls = [...new Set([...existingUrls, newSignal.url])]; // Deduplicate URLs
69
+ // Update existing signal
70
+ const { error: updateError } = await supabase
71
+ .from('signals')
72
+ .update({
73
+ score: newScore,
74
+ summary: combinedSummary,
75
+ mention_count: mentionCount,
76
+ metadata: {
77
+ ...existing.metadata,
78
+ source_urls: sourceUrls,
79
+ last_seen: new Date().toISOString(),
80
+ duplicate_count: mentionCount - 1
81
+ },
82
+ updated_at: new Date().toISOString()
83
+ })
84
+ .eq('id', existingSignalId);
85
+ if (updateError) {
86
+ console.error('[Deduplication] Failed to update signal:', updateError);
87
+ }
88
+ else {
89
+ console.log(`[Deduplication] Merged signal ${newSignal.id} into ${existingSignalId} (mentions: ${mentionCount}, score: ${newScore})`);
90
+ }
91
+ return existingSignalId;
92
+ }
93
+ /**
94
+ * Combine two summaries intelligently
95
+ * @param existing - Existing summary
96
+ * @param newSummary - New summary
97
+ * @returns Combined summary
98
+ */
99
+ combineSummaries(existing, newSummary) {
100
+ // Simple strategy: use the longer summary
101
+ // TODO: In future, use LLM to intelligently merge summaries
102
+ if (existing.length >= newSummary.length) {
103
+ return existing;
104
+ }
105
+ return newSummary;
106
+ }
107
+ /**
108
+ * Get deduplication statistics for a user
109
+ * @param userId - User ID
110
+ * @param supabase - Supabase client
111
+ * @returns Statistics object
112
+ */
113
+ async getStats(userId, supabase) {
114
+ try {
115
+ // Count total signals
116
+ const { count: totalSignals } = await supabase
117
+ .from('signals')
118
+ .select('*', { count: 'exact', head: true })
119
+ .eq('user_id', userId);
120
+ // Count merged signals (mention_count > 1)
121
+ const { count: mergedSignals } = await supabase
122
+ .from('signals')
123
+ .select('*', { count: 'exact', head: true })
124
+ .eq('user_id', userId)
125
+ .gt('mention_count', 1);
126
+ const deduplicationRate = totalSignals
127
+ ? (mergedSignals || 0) / totalSignals
128
+ : 0;
129
+ return {
130
+ totalSignals: totalSignals || 0,
131
+ mergedSignals: mergedSignals || 0,
132
+ deduplicationRate
133
+ };
134
+ }
135
+ catch (error) {
136
+ console.error('[Deduplication] Failed to get stats:', error.message);
137
+ return {
138
+ totalSignals: 0,
139
+ mergedSignals: 0,
140
+ deduplicationRate: 0
141
+ };
142
+ }
143
+ }
144
+ }
145
+ // Export singleton instance
146
+ export const deduplicationService = new DeduplicationService();
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Embedding Service using RealTimeX SDK
3
+ * Provides simplified interface for embedding generation and vector operations
4
+ * Gracefully degrades if SDK is not available
5
+ */
6
+ export class EmbeddingService {
7
+ WORKSPACE_ID = 'alchemy-signals';
8
+ SIMILARITY_THRESHOLD = 0.85;
9
+ sdk = null;
10
+ sdkLoadAttempted = false;
11
+ /**
12
+ * Lazy load SDK - only loads when first needed
13
+ */
14
+ async loadSDK() {
15
+ if (this.sdkLoadAttempted) {
16
+ return this.sdk;
17
+ }
18
+ this.sdkLoadAttempted = true;
19
+ try {
20
+ const sdkModule = await import('../lib/realtimex-sdk.js');
21
+ this.sdk = sdkModule.realtimeXSDK;
22
+ console.log('[EmbeddingService] RealTimeX SDK loaded successfully');
23
+ }
24
+ catch (error) {
25
+ console.warn('[EmbeddingService] RealTimeX SDK not available - embedding features disabled');
26
+ this.sdk = null;
27
+ }
28
+ return this.sdk;
29
+ }
30
+ /**
31
+ * Generate embedding for a single text
32
+ * @param text - Text to embed
33
+ * @param settings - Alchemy settings with embedding configuration
34
+ * @returns Embedding vector or null if failed
35
+ */
36
+ async generateEmbedding(text, settings) {
37
+ try {
38
+ const sdk = await this.loadSDK();
39
+ if (!sdk || !sdk.isConfigured()) {
40
+ console.warn('[EmbeddingService] RealTimeX SDK not available');
41
+ return null;
42
+ }
43
+ const provider = this.getProvider(settings);
44
+ const model = settings.embedding_model || 'text-embedding-3-small';
45
+ const embeddings = await sdk.generateEmbedding(text, provider, model);
46
+ return embeddings[0] || null;
47
+ }
48
+ catch (error) {
49
+ console.error('[EmbeddingService] Generation failed:', error.message);
50
+ return null;
51
+ }
52
+ }
53
+ /**
54
+ * Generate embeddings for multiple texts (batch)
55
+ * @param texts - Array of texts to embed
56
+ * @param settings - Alchemy settings
57
+ * @returns Array of embedding vectors or null if failed
58
+ */
59
+ async generateEmbeddings(texts, settings) {
60
+ try {
61
+ const sdk = await this.loadSDK();
62
+ if (!sdk || !sdk.isConfigured()) {
63
+ console.warn('[EmbeddingService] RealTimeX SDK not configured');
64
+ return null;
65
+ }
66
+ const provider = this.getProvider(settings);
67
+ const model = settings.embedding_model || 'text-embedding-3-small';
68
+ return await sdk.generateEmbedding(texts, provider, model);
69
+ }
70
+ catch (error) {
71
+ console.error('[EmbeddingService] Batch generation failed:', error.message);
72
+ return null;
73
+ }
74
+ }
75
+ /**
76
+ * Store signal embedding in RealTimeX vector storage
77
+ * @param signalId - Unique signal ID
78
+ * @param embedding - Embedding vector
79
+ * @param metadata - Signal metadata
80
+ */
81
+ async storeSignalEmbedding(signalId, embedding, metadata) {
82
+ try {
83
+ const sdk = await this.loadSDK();
84
+ if (!sdk) {
85
+ throw new Error('SDK not available');
86
+ }
87
+ await sdk.upsertVectors([{
88
+ id: signalId,
89
+ vector: embedding,
90
+ metadata
91
+ }], this.WORKSPACE_ID);
92
+ console.log('[EmbeddingService] Stored embedding for signal:', signalId);
93
+ }
94
+ catch (error) {
95
+ console.error('[EmbeddingService] Storage failed:', error.message);
96
+ throw error;
97
+ }
98
+ }
99
+ /**
100
+ * Find similar signals using semantic search
101
+ * @param queryEmbedding - Query embedding vector
102
+ * @param userId - User ID for filtering
103
+ * @param threshold - Similarity threshold (0-1)
104
+ * @param limit - Max results
105
+ * @returns Array of similar signals
106
+ */
107
+ async findSimilarSignals(queryEmbedding, userId, threshold = this.SIMILARITY_THRESHOLD, limit = 10) {
108
+ try {
109
+ const sdk = await this.loadSDK();
110
+ if (!sdk) {
111
+ return [];
112
+ }
113
+ const results = await sdk.queryVectors(queryEmbedding, limit, this.WORKSPACE_ID, { userId } // Filter by user
114
+ );
115
+ // Filter by similarity threshold
116
+ return results.filter((r) => r.score >= threshold);
117
+ }
118
+ catch (error) {
119
+ console.error('[EmbeddingService] Similarity search failed:', error.message);
120
+ return [];
121
+ }
122
+ }
123
+ /**
124
+ * Delete all embeddings for a user
125
+ * @param userId - User ID
126
+ */
127
+ async deleteUserEmbeddings(userId) {
128
+ try {
129
+ // Note: Current SDK only supports deleteAll
130
+ // In future, we may need user-specific workspaces
131
+ console.warn('[EmbeddingService] User-specific deletion not yet supported');
132
+ }
133
+ catch (error) {
134
+ console.error('[EmbeddingService] Deletion failed:', error.message);
135
+ throw error;
136
+ }
137
+ }
138
+ /**
139
+ * Determine provider from settings
140
+ * @param settings - Alchemy settings
141
+ * @returns Provider name
142
+ */
143
+ getProvider(settings) {
144
+ // If embedding_base_url is not set, use realtimexai (default)
145
+ if (!settings.embedding_base_url) {
146
+ return 'realtimexai';
147
+ }
148
+ // Detect provider from base URL
149
+ const url = settings.embedding_base_url.toLowerCase();
150
+ if (url.includes('openai')) {
151
+ return 'openai';
152
+ }
153
+ else if (url.includes('google') || url.includes('gemini')) {
154
+ return 'gemini';
155
+ }
156
+ // Default to realtimexai
157
+ return 'realtimexai';
158
+ }
159
+ /**
160
+ * Check if embedding service is available
161
+ * @returns True if SDK is configured and available
162
+ */
163
+ async isAvailable() {
164
+ const sdk = await this.loadSDK();
165
+ return sdk !== null && sdk.isConfigured();
166
+ }
167
+ }
168
+ // Export singleton instance
169
+ export const embeddingService = new EmbeddingService();