@realtimex/realtimex-alchemy 1.0.23 → 1.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/lib/realtimex-sdk.js +163 -0
- package/dist/api/services/AlchemistService.js +62 -2
- package/dist/api/services/DeduplicationService.js +146 -0
- package/dist/api/services/EmbeddingService.js +169 -0
- package/dist/api/services/MinerService.js +103 -78
- package/dist/assets/index-BMTB9X3D.css +1 -0
- package/dist/assets/{index-D-WWw08L.js → index-CkJ4-_9k.js} +38 -30
- package/dist/index.html +2 -2
- package/package.json +1 -1
- package/dist/assets/index-DJ1D7IMe.css +0 -1
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
/**
|
|
3
|
+
* RealTimeX SDK Client
|
|
4
|
+
* Provides access to RealTimeX Desktop APIs for LLM, embeddings, and vector storage
|
|
5
|
+
*/
|
|
6
|
+
export class RealTimeXSDK {
|
|
7
|
+
client;
|
|
8
|
+
appId;
|
|
9
|
+
baseUrl;
|
|
10
|
+
constructor(baseUrl = 'http://localhost:3001') {
|
|
11
|
+
this.baseUrl = baseUrl;
|
|
12
|
+
// RTX_APP_ID is injected by RealTimeX when starting the app
|
|
13
|
+
this.appId = process.env.RTX_APP_ID || '';
|
|
14
|
+
this.client = axios.create({
|
|
15
|
+
baseURL: this.baseUrl,
|
|
16
|
+
headers: {
|
|
17
|
+
'X-App-Id': this.appId,
|
|
18
|
+
'Content-Type': 'application/json'
|
|
19
|
+
},
|
|
20
|
+
timeout: 30000 // 30 second timeout
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Generate embeddings for text inputs
|
|
25
|
+
* @param input - Single string or array of strings to embed
|
|
26
|
+
* @param provider - Provider name (realtimexai, openai, gemini)
|
|
27
|
+
* @param model - Model name (e.g., text-embedding-3-small)
|
|
28
|
+
* @returns Array of embedding vectors
|
|
29
|
+
*/
|
|
30
|
+
async generateEmbedding(input, provider = 'realtimexai', model = 'text-embedding-3-small') {
|
|
31
|
+
try {
|
|
32
|
+
const response = await this.client.post('/sdk/llm/embed', {
|
|
33
|
+
input: Array.isArray(input) ? input : [input],
|
|
34
|
+
provider,
|
|
35
|
+
model
|
|
36
|
+
});
|
|
37
|
+
if (!response.data.success) {
|
|
38
|
+
throw new Error(response.data.error || 'Embedding generation failed');
|
|
39
|
+
}
|
|
40
|
+
return response.data.embeddings;
|
|
41
|
+
}
|
|
42
|
+
catch (error) {
|
|
43
|
+
console.error('[RealTimeXSDK] Embedding error:', error.message);
|
|
44
|
+
throw error;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Store vectors in RealTimeX managed vector storage
|
|
49
|
+
* @param vectors - Array of vectors with metadata
|
|
50
|
+
* @param workspaceId - Workspace/namespace for isolation
|
|
51
|
+
*/
|
|
52
|
+
async upsertVectors(vectors, workspaceId = 'default') {
|
|
53
|
+
try {
|
|
54
|
+
const response = await this.client.post('/sdk/llm/vectors/upsert', {
|
|
55
|
+
vectors,
|
|
56
|
+
workspaceId
|
|
57
|
+
});
|
|
58
|
+
if (!response.data.success) {
|
|
59
|
+
throw new Error(response.data.error || 'Vector upsert failed');
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
console.error('[RealTimeXSDK] Vector upsert error:', error.message);
|
|
64
|
+
throw error;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Query vectors for semantic similarity search
|
|
69
|
+
* @param queryVector - Query embedding vector
|
|
70
|
+
* @param topK - Number of results to return
|
|
71
|
+
* @param workspaceId - Workspace/namespace to search in
|
|
72
|
+
* @param filter - Optional metadata filter
|
|
73
|
+
* @returns Array of similar vectors with scores
|
|
74
|
+
*/
|
|
75
|
+
async queryVectors(queryVector, topK = 10, workspaceId = 'default', filter) {
|
|
76
|
+
try {
|
|
77
|
+
const response = await this.client.post('/sdk/llm/vectors/query', {
|
|
78
|
+
vector: queryVector,
|
|
79
|
+
topK,
|
|
80
|
+
workspaceId,
|
|
81
|
+
filter
|
|
82
|
+
});
|
|
83
|
+
if (!response.data.success) {
|
|
84
|
+
throw new Error(response.data.error || 'Vector query failed');
|
|
85
|
+
}
|
|
86
|
+
return response.data.results || [];
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.error('[RealTimeXSDK] Vector query error:', error.message);
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Delete vectors from storage
|
|
95
|
+
* @param workspaceId - Workspace to delete from
|
|
96
|
+
* @param deleteAll - If true, deletes all vectors in workspace
|
|
97
|
+
*/
|
|
98
|
+
async deleteVectors(workspaceId, deleteAll = false) {
|
|
99
|
+
try {
|
|
100
|
+
const response = await this.client.post('/sdk/llm/vectors/delete', {
|
|
101
|
+
workspaceId,
|
|
102
|
+
deleteAll
|
|
103
|
+
});
|
|
104
|
+
if (!response.data.success) {
|
|
105
|
+
throw new Error(response.data.error || 'Vector deletion failed');
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
catch (error) {
|
|
109
|
+
console.error('[RealTimeXSDK] Vector delete error:', error.message);
|
|
110
|
+
throw error;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* List available LLM and embedding providers
|
|
115
|
+
* @returns Provider configuration
|
|
116
|
+
*/
|
|
117
|
+
async getProviders() {
|
|
118
|
+
try {
|
|
119
|
+
const response = await this.client.get('/sdk/llm/providers');
|
|
120
|
+
if (!response.data.success) {
|
|
121
|
+
throw new Error(response.data.error || 'Failed to fetch providers');
|
|
122
|
+
}
|
|
123
|
+
return response.data;
|
|
124
|
+
}
|
|
125
|
+
catch (error) {
|
|
126
|
+
console.error('[RealTimeXSDK] Get providers error:', error.message);
|
|
127
|
+
throw error;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* List all vector storage workspaces
|
|
132
|
+
* @returns Array of workspace IDs
|
|
133
|
+
*/
|
|
134
|
+
async listVectorWorkspaces() {
|
|
135
|
+
try {
|
|
136
|
+
const response = await this.client.get('/sdk/llm/vectors/workspaces');
|
|
137
|
+
if (!response.data.success) {
|
|
138
|
+
throw new Error(response.data.error || 'Failed to list workspaces');
|
|
139
|
+
}
|
|
140
|
+
return response.data.workspaces || [];
|
|
141
|
+
}
|
|
142
|
+
catch (error) {
|
|
143
|
+
console.error('[RealTimeXSDK] List workspaces error:', error.message);
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Check if SDK is properly configured
|
|
149
|
+
* @returns True if app ID is set
|
|
150
|
+
*/
|
|
151
|
+
isConfigured() {
|
|
152
|
+
return !!this.appId;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Get current app ID
|
|
156
|
+
* @returns App ID or empty string
|
|
157
|
+
*/
|
|
158
|
+
getAppId() {
|
|
159
|
+
return this.appId;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
// Export singleton instance
|
|
163
|
+
export const realtimeXSDK = new RealTimeXSDK();
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import OpenAI from 'openai';
|
|
2
2
|
import { ProcessingEventService } from './ProcessingEventService.js';
|
|
3
3
|
import { RouterService } from './RouterService.js';
|
|
4
|
+
import { embeddingService } from './EmbeddingService.js';
|
|
5
|
+
import { deduplicationService } from './DeduplicationService.js';
|
|
4
6
|
export class AlchemistService {
|
|
5
7
|
processingEvents;
|
|
6
8
|
router;
|
|
@@ -86,7 +88,9 @@ export class AlchemistService {
|
|
|
86
88
|
}, supabase);
|
|
87
89
|
// 4. Save Signal
|
|
88
90
|
console.log('[AlchemistService] Saving signal to database...');
|
|
89
|
-
const { error: insertError } = await supabase
|
|
91
|
+
const { data: insertedSignal, error: insertError } = await supabase
|
|
92
|
+
.from('signals')
|
|
93
|
+
.insert([{
|
|
90
94
|
user_id: userId,
|
|
91
95
|
url: entry.url,
|
|
92
96
|
title: entry.title,
|
|
@@ -96,7 +100,9 @@ export class AlchemistService {
|
|
|
96
100
|
entities: response.entities,
|
|
97
101
|
tags: response.tags,
|
|
98
102
|
content: content
|
|
99
|
-
}])
|
|
103
|
+
}])
|
|
104
|
+
.select()
|
|
105
|
+
.single();
|
|
100
106
|
if (insertError) {
|
|
101
107
|
console.error('[AlchemistService] Insert error:', insertError);
|
|
102
108
|
stats.errors++;
|
|
@@ -104,6 +110,12 @@ export class AlchemistService {
|
|
|
104
110
|
else {
|
|
105
111
|
console.log('[AlchemistService] Signal saved successfully');
|
|
106
112
|
stats.signals++;
|
|
113
|
+
// 5. Generate Embedding & Check for Duplicates (non-blocking)
|
|
114
|
+
if (settings.embedding_model && await embeddingService.isAvailable()) {
|
|
115
|
+
this.processEmbedding(insertedSignal, settings, userId, supabase).catch((err) => {
|
|
116
|
+
console.error('[AlchemistService] Embedding processing failed:', err);
|
|
117
|
+
});
|
|
118
|
+
}
|
|
107
119
|
}
|
|
108
120
|
}
|
|
109
121
|
else {
|
|
@@ -238,4 +250,52 @@ export class AlchemistService {
|
|
|
238
250
|
return { score: 0, summary: 'Failed to parse', category: 'Error', entities: [], tags: [], relevant: false };
|
|
239
251
|
}
|
|
240
252
|
}
|
|
253
|
+
/**
|
|
254
|
+
* Process embedding generation and deduplication for a signal
|
|
255
|
+
* This runs asynchronously and doesn't block the main mining pipeline
|
|
256
|
+
*/
|
|
257
|
+
async processEmbedding(signal, settings, userId, supabase) {
|
|
258
|
+
try {
|
|
259
|
+
console.log('[AlchemistService] Generating embedding for signal:', signal.id);
|
|
260
|
+
// Generate embedding
|
|
261
|
+
const text = `${signal.title} ${signal.summary}`;
|
|
262
|
+
const embedding = await embeddingService.generateEmbedding(text, settings);
|
|
263
|
+
if (!embedding) {
|
|
264
|
+
console.warn('[AlchemistService] Embedding generation returned null, skipping');
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
// Check for duplicates
|
|
268
|
+
const dedupeResult = await deduplicationService.checkAndMergeDuplicate(signal, embedding, userId, supabase);
|
|
269
|
+
if (dedupeResult.isDuplicate) {
|
|
270
|
+
console.log(`[AlchemistService] Signal is duplicate, merged into: ${dedupeResult.mergedSignalId}`);
|
|
271
|
+
// Delete the newly inserted signal since it's a duplicate
|
|
272
|
+
await supabase
|
|
273
|
+
.from('signals')
|
|
274
|
+
.delete()
|
|
275
|
+
.eq('id', signal.id);
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
// Store embedding in RealTimeX vector storage
|
|
279
|
+
await embeddingService.storeSignalEmbedding(signal.id, embedding, {
|
|
280
|
+
title: signal.title,
|
|
281
|
+
summary: signal.summary,
|
|
282
|
+
url: signal.url,
|
|
283
|
+
category: signal.category,
|
|
284
|
+
userId
|
|
285
|
+
});
|
|
286
|
+
// Update signal metadata
|
|
287
|
+
await supabase
|
|
288
|
+
.from('signals')
|
|
289
|
+
.update({
|
|
290
|
+
has_embedding: true,
|
|
291
|
+
embedding_model: settings.embedding_model
|
|
292
|
+
})
|
|
293
|
+
.eq('id', signal.id);
|
|
294
|
+
console.log('[AlchemistService] Embedding processed successfully for signal:', signal.id);
|
|
295
|
+
}
|
|
296
|
+
catch (error) {
|
|
297
|
+
console.error('[AlchemistService] Embedding processing error:', error.message);
|
|
298
|
+
// Don't throw - we don't want to fail the entire mining process
|
|
299
|
+
}
|
|
300
|
+
}
|
|
241
301
|
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { embeddingService } from './EmbeddingService.js';
|
|
2
|
+
/**
|
|
3
|
+
* Deduplication Service
|
|
4
|
+
* Handles smart signal merging based on semantic similarity
|
|
5
|
+
*/
|
|
6
|
+
export class DeduplicationService {
|
|
7
|
+
SIMILARITY_THRESHOLD = 0.85;
|
|
8
|
+
/**
|
|
9
|
+
* Check if signal is a duplicate and merge if necessary
|
|
10
|
+
* @param signal - New signal to check
|
|
11
|
+
* @param embedding - Signal embedding vector
|
|
12
|
+
* @param userId - User ID
|
|
13
|
+
* @param supabase - Supabase client
|
|
14
|
+
* @returns Deduplication result
|
|
15
|
+
*/
|
|
16
|
+
async checkAndMergeDuplicate(signal, embedding, userId, supabase) {
|
|
17
|
+
try {
|
|
18
|
+
// Find similar signals
|
|
19
|
+
const similar = await embeddingService.findSimilarSignals(embedding, userId, this.SIMILARITY_THRESHOLD, 5 // Check top 5 matches
|
|
20
|
+
);
|
|
21
|
+
if (similar.length === 0) {
|
|
22
|
+
return { isDuplicate: false };
|
|
23
|
+
}
|
|
24
|
+
// Get the most similar signal
|
|
25
|
+
const bestMatch = similar[0];
|
|
26
|
+
console.log(`[Deduplication] Found similar signal: ${bestMatch.id} (score: ${bestMatch.score})`);
|
|
27
|
+
// Merge signals
|
|
28
|
+
const mergedId = await this.mergeSignals(bestMatch.id, signal, userId, supabase);
|
|
29
|
+
return {
|
|
30
|
+
isDuplicate: true,
|
|
31
|
+
mergedSignalId: mergedId,
|
|
32
|
+
similarityScore: bestMatch.score
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
catch (error) {
|
|
36
|
+
console.error('[Deduplication] Error:', error.message);
|
|
37
|
+
return { isDuplicate: false };
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Merge new signal into existing signal
|
|
42
|
+
* @param existingSignalId - ID of existing signal
|
|
43
|
+
* @param newSignal - New signal to merge
|
|
44
|
+
* @param userId - User ID
|
|
45
|
+
* @param supabase - Supabase client
|
|
46
|
+
* @returns Merged signal ID
|
|
47
|
+
*/
|
|
48
|
+
async mergeSignals(existingSignalId, newSignal, userId, supabase) {
|
|
49
|
+
// Fetch existing signal
|
|
50
|
+
const { data: existing, error } = await supabase
|
|
51
|
+
.from('signals')
|
|
52
|
+
.select('*')
|
|
53
|
+
.eq('id', existingSignalId)
|
|
54
|
+
.eq('user_id', userId)
|
|
55
|
+
.single();
|
|
56
|
+
if (error || !existing) {
|
|
57
|
+
console.error('[Deduplication] Failed to fetch existing signal:', error);
|
|
58
|
+
return existingSignalId;
|
|
59
|
+
}
|
|
60
|
+
// Calculate boosted score
|
|
61
|
+
const mentionCount = (existing.mention_count || 1) + 1;
|
|
62
|
+
const scoreBoost = Math.min(mentionCount * 0.1, 0.5); // Max 50% boost
|
|
63
|
+
const newScore = Math.min(existing.score + scoreBoost, 10); // Cap at 10
|
|
64
|
+
// Combine summaries (use longer one)
|
|
65
|
+
const combinedSummary = this.combineSummaries(existing.summary, newSignal.summary);
|
|
66
|
+
// Track source URLs in metadata
|
|
67
|
+
const existingUrls = existing.metadata?.source_urls || [existing.url];
|
|
68
|
+
const sourceUrls = [...new Set([...existingUrls, newSignal.url])]; // Deduplicate URLs
|
|
69
|
+
// Update existing signal
|
|
70
|
+
const { error: updateError } = await supabase
|
|
71
|
+
.from('signals')
|
|
72
|
+
.update({
|
|
73
|
+
score: newScore,
|
|
74
|
+
summary: combinedSummary,
|
|
75
|
+
mention_count: mentionCount,
|
|
76
|
+
metadata: {
|
|
77
|
+
...existing.metadata,
|
|
78
|
+
source_urls: sourceUrls,
|
|
79
|
+
last_seen: new Date().toISOString(),
|
|
80
|
+
duplicate_count: mentionCount - 1
|
|
81
|
+
},
|
|
82
|
+
updated_at: new Date().toISOString()
|
|
83
|
+
})
|
|
84
|
+
.eq('id', existingSignalId);
|
|
85
|
+
if (updateError) {
|
|
86
|
+
console.error('[Deduplication] Failed to update signal:', updateError);
|
|
87
|
+
}
|
|
88
|
+
else {
|
|
89
|
+
console.log(`[Deduplication] Merged signal ${newSignal.id} into ${existingSignalId} (mentions: ${mentionCount}, score: ${newScore})`);
|
|
90
|
+
}
|
|
91
|
+
return existingSignalId;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Combine two summaries intelligently
|
|
95
|
+
* @param existing - Existing summary
|
|
96
|
+
* @param newSummary - New summary
|
|
97
|
+
* @returns Combined summary
|
|
98
|
+
*/
|
|
99
|
+
combineSummaries(existing, newSummary) {
|
|
100
|
+
// Simple strategy: use the longer summary
|
|
101
|
+
// TODO: In future, use LLM to intelligently merge summaries
|
|
102
|
+
if (existing.length >= newSummary.length) {
|
|
103
|
+
return existing;
|
|
104
|
+
}
|
|
105
|
+
return newSummary;
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Get deduplication statistics for a user
|
|
109
|
+
* @param userId - User ID
|
|
110
|
+
* @param supabase - Supabase client
|
|
111
|
+
* @returns Statistics object
|
|
112
|
+
*/
|
|
113
|
+
async getStats(userId, supabase) {
|
|
114
|
+
try {
|
|
115
|
+
// Count total signals
|
|
116
|
+
const { count: totalSignals } = await supabase
|
|
117
|
+
.from('signals')
|
|
118
|
+
.select('*', { count: 'exact', head: true })
|
|
119
|
+
.eq('user_id', userId);
|
|
120
|
+
// Count merged signals (mention_count > 1)
|
|
121
|
+
const { count: mergedSignals } = await supabase
|
|
122
|
+
.from('signals')
|
|
123
|
+
.select('*', { count: 'exact', head: true })
|
|
124
|
+
.eq('user_id', userId)
|
|
125
|
+
.gt('mention_count', 1);
|
|
126
|
+
const deduplicationRate = totalSignals
|
|
127
|
+
? (mergedSignals || 0) / totalSignals
|
|
128
|
+
: 0;
|
|
129
|
+
return {
|
|
130
|
+
totalSignals: totalSignals || 0,
|
|
131
|
+
mergedSignals: mergedSignals || 0,
|
|
132
|
+
deduplicationRate
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
catch (error) {
|
|
136
|
+
console.error('[Deduplication] Failed to get stats:', error.message);
|
|
137
|
+
return {
|
|
138
|
+
totalSignals: 0,
|
|
139
|
+
mergedSignals: 0,
|
|
140
|
+
deduplicationRate: 0
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Export singleton instance
|
|
146
|
+
export const deduplicationService = new DeduplicationService();
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Service using RealTimeX SDK
|
|
3
|
+
* Provides simplified interface for embedding generation and vector operations
|
|
4
|
+
* Gracefully degrades if SDK is not available
|
|
5
|
+
*/
|
|
6
|
+
export class EmbeddingService {
|
|
7
|
+
WORKSPACE_ID = 'alchemy-signals';
|
|
8
|
+
SIMILARITY_THRESHOLD = 0.85;
|
|
9
|
+
sdk = null;
|
|
10
|
+
sdkLoadAttempted = false;
|
|
11
|
+
/**
|
|
12
|
+
* Lazy load SDK - only loads when first needed
|
|
13
|
+
*/
|
|
14
|
+
async loadSDK() {
|
|
15
|
+
if (this.sdkLoadAttempted) {
|
|
16
|
+
return this.sdk;
|
|
17
|
+
}
|
|
18
|
+
this.sdkLoadAttempted = true;
|
|
19
|
+
try {
|
|
20
|
+
const sdkModule = await import('../lib/realtimex-sdk.js');
|
|
21
|
+
this.sdk = sdkModule.realtimeXSDK;
|
|
22
|
+
console.log('[EmbeddingService] RealTimeX SDK loaded successfully');
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
console.warn('[EmbeddingService] RealTimeX SDK not available - embedding features disabled');
|
|
26
|
+
this.sdk = null;
|
|
27
|
+
}
|
|
28
|
+
return this.sdk;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Generate embedding for a single text
|
|
32
|
+
* @param text - Text to embed
|
|
33
|
+
* @param settings - Alchemy settings with embedding configuration
|
|
34
|
+
* @returns Embedding vector or null if failed
|
|
35
|
+
*/
|
|
36
|
+
async generateEmbedding(text, settings) {
|
|
37
|
+
try {
|
|
38
|
+
const sdk = await this.loadSDK();
|
|
39
|
+
if (!sdk || !sdk.isConfigured()) {
|
|
40
|
+
console.warn('[EmbeddingService] RealTimeX SDK not available');
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
const provider = this.getProvider(settings);
|
|
44
|
+
const model = settings.embedding_model || 'text-embedding-3-small';
|
|
45
|
+
const embeddings = await sdk.generateEmbedding(text, provider, model);
|
|
46
|
+
return embeddings[0] || null;
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
console.error('[EmbeddingService] Generation failed:', error.message);
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Generate embeddings for multiple texts (batch)
|
|
55
|
+
* @param texts - Array of texts to embed
|
|
56
|
+
* @param settings - Alchemy settings
|
|
57
|
+
* @returns Array of embedding vectors or null if failed
|
|
58
|
+
*/
|
|
59
|
+
async generateEmbeddings(texts, settings) {
|
|
60
|
+
try {
|
|
61
|
+
const sdk = await this.loadSDK();
|
|
62
|
+
if (!sdk || !sdk.isConfigured()) {
|
|
63
|
+
console.warn('[EmbeddingService] RealTimeX SDK not configured');
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
const provider = this.getProvider(settings);
|
|
67
|
+
const model = settings.embedding_model || 'text-embedding-3-small';
|
|
68
|
+
return await sdk.generateEmbedding(texts, provider, model);
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
console.error('[EmbeddingService] Batch generation failed:', error.message);
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Store signal embedding in RealTimeX vector storage
|
|
77
|
+
* @param signalId - Unique signal ID
|
|
78
|
+
* @param embedding - Embedding vector
|
|
79
|
+
* @param metadata - Signal metadata
|
|
80
|
+
*/
|
|
81
|
+
async storeSignalEmbedding(signalId, embedding, metadata) {
|
|
82
|
+
try {
|
|
83
|
+
const sdk = await this.loadSDK();
|
|
84
|
+
if (!sdk) {
|
|
85
|
+
throw new Error('SDK not available');
|
|
86
|
+
}
|
|
87
|
+
await sdk.upsertVectors([{
|
|
88
|
+
id: signalId,
|
|
89
|
+
vector: embedding,
|
|
90
|
+
metadata
|
|
91
|
+
}], this.WORKSPACE_ID);
|
|
92
|
+
console.log('[EmbeddingService] Stored embedding for signal:', signalId);
|
|
93
|
+
}
|
|
94
|
+
catch (error) {
|
|
95
|
+
console.error('[EmbeddingService] Storage failed:', error.message);
|
|
96
|
+
throw error;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Find similar signals using semantic search
|
|
101
|
+
* @param queryEmbedding - Query embedding vector
|
|
102
|
+
* @param userId - User ID for filtering
|
|
103
|
+
* @param threshold - Similarity threshold (0-1)
|
|
104
|
+
* @param limit - Max results
|
|
105
|
+
* @returns Array of similar signals
|
|
106
|
+
*/
|
|
107
|
+
async findSimilarSignals(queryEmbedding, userId, threshold = this.SIMILARITY_THRESHOLD, limit = 10) {
|
|
108
|
+
try {
|
|
109
|
+
const sdk = await this.loadSDK();
|
|
110
|
+
if (!sdk) {
|
|
111
|
+
return [];
|
|
112
|
+
}
|
|
113
|
+
const results = await sdk.queryVectors(queryEmbedding, limit, this.WORKSPACE_ID, { userId } // Filter by user
|
|
114
|
+
);
|
|
115
|
+
// Filter by similarity threshold
|
|
116
|
+
return results.filter((r) => r.score >= threshold);
|
|
117
|
+
}
|
|
118
|
+
catch (error) {
|
|
119
|
+
console.error('[EmbeddingService] Similarity search failed:', error.message);
|
|
120
|
+
return [];
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Delete all embeddings for a user
|
|
125
|
+
* @param userId - User ID
|
|
126
|
+
*/
|
|
127
|
+
async deleteUserEmbeddings(userId) {
|
|
128
|
+
try {
|
|
129
|
+
// Note: Current SDK only supports deleteAll
|
|
130
|
+
// In future, we may need user-specific workspaces
|
|
131
|
+
console.warn('[EmbeddingService] User-specific deletion not yet supported');
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
console.error('[EmbeddingService] Deletion failed:', error.message);
|
|
135
|
+
throw error;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Determine provider from settings
|
|
140
|
+
* @param settings - Alchemy settings
|
|
141
|
+
* @returns Provider name
|
|
142
|
+
*/
|
|
143
|
+
getProvider(settings) {
|
|
144
|
+
// If embedding_base_url is not set, use realtimexai (default)
|
|
145
|
+
if (!settings.embedding_base_url) {
|
|
146
|
+
return 'realtimexai';
|
|
147
|
+
}
|
|
148
|
+
// Detect provider from base URL
|
|
149
|
+
const url = settings.embedding_base_url.toLowerCase();
|
|
150
|
+
if (url.includes('openai')) {
|
|
151
|
+
return 'openai';
|
|
152
|
+
}
|
|
153
|
+
else if (url.includes('google') || url.includes('gemini')) {
|
|
154
|
+
return 'gemini';
|
|
155
|
+
}
|
|
156
|
+
// Default to realtimexai
|
|
157
|
+
return 'realtimexai';
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Check if embedding service is available
|
|
161
|
+
* @returns True if SDK is configured and available
|
|
162
|
+
*/
|
|
163
|
+
async isAvailable() {
|
|
164
|
+
const sdk = await this.loadSDK();
|
|
165
|
+
return sdk !== null && sdk.isConfigured();
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Export singleton instance
|
|
169
|
+
export const embeddingService = new EmbeddingService();
|