catalist-support-agent 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/admin-portal.d.ts +43 -0
- package/dist/admin-portal.d.ts.map +1 -0
- package/dist/admin-portal.js +166 -0
- package/dist/admin-portal.js.map +1 -0
- package/dist/analysis/entities.d.ts +73 -0
- package/dist/analysis/entities.d.ts.map +1 -0
- package/dist/analysis/entities.js +378 -0
- package/dist/analysis/entities.js.map +1 -0
- package/dist/analysis/index.d.ts +44 -0
- package/dist/analysis/index.d.ts.map +1 -0
- package/dist/analysis/index.js +243 -0
- package/dist/analysis/index.js.map +1 -0
- package/dist/analysis/intent.d.ts +49 -0
- package/dist/analysis/intent.d.ts.map +1 -0
- package/dist/analysis/intent.js +320 -0
- package/dist/analysis/intent.js.map +1 -0
- package/dist/analysis/sentiment.d.ts +57 -0
- package/dist/analysis/sentiment.d.ts.map +1 -0
- package/dist/analysis/sentiment.js +351 -0
- package/dist/analysis/sentiment.js.map +1 -0
- package/dist/brand/compliance.d.ts +122 -0
- package/dist/brand/compliance.d.ts.map +1 -0
- package/dist/brand/compliance.js +378 -0
- package/dist/brand/compliance.js.map +1 -0
- package/dist/brand/forbidden-terms.d.ts +99 -0
- package/dist/brand/forbidden-terms.d.ts.map +1 -0
- package/dist/brand/forbidden-terms.js +265 -0
- package/dist/brand/forbidden-terms.js.map +1 -0
- package/dist/brand/index.d.ts +10 -0
- package/dist/brand/index.d.ts.map +1 -0
- package/dist/brand/index.js +12 -0
- package/dist/brand/index.js.map +1 -0
- package/dist/config.d.ts +325 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +492 -0
- package/dist/config.js.map +1 -0
- package/dist/delivery/index.d.ts +84 -0
- package/dist/delivery/index.d.ts.map +1 -0
- package/dist/delivery/index.js +435 -0
- package/dist/delivery/index.js.map +1 -0
- package/dist/embeddings/cache.d.ts +96 -0
- package/dist/embeddings/cache.d.ts.map +1 -0
- package/dist/embeddings/cache.js +193 -0
- package/dist/embeddings/cache.js.map +1 -0
- package/dist/embeddings/index.d.ts +152 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +337 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/openai-client.d.ts +67 -0
- package/dist/embeddings/openai-client.d.ts.map +1 -0
- package/dist/embeddings/openai-client.js +190 -0
- package/dist/embeddings/openai-client.js.map +1 -0
- package/dist/errors.d.ts +302 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +508 -0
- package/dist/errors.js.map +1 -0
- package/dist/escalation/index.d.ts +93 -0
- package/dist/escalation/index.d.ts.map +1 -0
- package/dist/escalation/index.js +436 -0
- package/dist/escalation/index.js.map +1 -0
- package/dist/extraction/deduplication.d.ts +97 -0
- package/dist/extraction/deduplication.d.ts.map +1 -0
- package/dist/extraction/deduplication.js +271 -0
- package/dist/extraction/deduplication.js.map +1 -0
- package/dist/extraction/gmail-extractor.d.ts +160 -0
- package/dist/extraction/gmail-extractor.d.ts.map +1 -0
- package/dist/extraction/gmail-extractor.js +396 -0
- package/dist/extraction/gmail-extractor.js.map +1 -0
- package/dist/extraction/gmail-token-manager.d.ts +36 -0
- package/dist/extraction/gmail-token-manager.d.ts.map +1 -0
- package/dist/extraction/gmail-token-manager.js +146 -0
- package/dist/extraction/gmail-token-manager.js.map +1 -0
- package/dist/extraction/index.d.ts +13 -0
- package/dist/extraction/index.d.ts.map +1 -0
- package/dist/extraction/index.js +20 -0
- package/dist/extraction/index.js.map +1 -0
- package/dist/extraction/pii-handler.d.ts +100 -0
- package/dist/extraction/pii-handler.d.ts.map +1 -0
- package/dist/extraction/pii-handler.js +295 -0
- package/dist/extraction/pii-handler.js.map +1 -0
- package/dist/extraction/pipeline.d.ts +94 -0
- package/dist/extraction/pipeline.d.ts.map +1 -0
- package/dist/extraction/pipeline.js +380 -0
- package/dist/extraction/pipeline.js.map +1 -0
- package/dist/extraction/quality-filter.d.ts +99 -0
- package/dist/extraction/quality-filter.d.ts.map +1 -0
- package/dist/extraction/quality-filter.js +370 -0
- package/dist/extraction/quality-filter.js.map +1 -0
- package/dist/extraction/rate-limiter.d.ts +90 -0
- package/dist/extraction/rate-limiter.d.ts.map +1 -0
- package/dist/extraction/rate-limiter.js +242 -0
- package/dist/extraction/rate-limiter.js.map +1 -0
- package/dist/extraction/state-manager.d.ts +126 -0
- package/dist/extraction/state-manager.d.ts.map +1 -0
- package/dist/extraction/state-manager.js +344 -0
- package/dist/extraction/state-manager.js.map +1 -0
- package/dist/generation/index.d.ts +75 -0
- package/dist/generation/index.d.ts.map +1 -0
- package/dist/generation/index.js +641 -0
- package/dist/generation/index.js.map +1 -0
- package/dist/index.d.ts +96 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +233 -0
- package/dist/index.js.map +1 -0
- package/dist/intake/index.d.ts +15 -0
- package/dist/intake/index.d.ts.map +1 -0
- package/dist/intake/index.js +19 -0
- package/dist/intake/index.js.map +1 -0
- package/dist/intake/normalizer.d.ts +163 -0
- package/dist/intake/normalizer.d.ts.map +1 -0
- package/dist/intake/normalizer.js +309 -0
- package/dist/intake/normalizer.js.map +1 -0
- package/dist/intake/postmark.d.ts +72 -0
- package/dist/intake/postmark.d.ts.map +1 -0
- package/dist/intake/postmark.js +276 -0
- package/dist/intake/postmark.js.map +1 -0
- package/dist/intake/slack.d.ts +106 -0
- package/dist/intake/slack.d.ts.map +1 -0
- package/dist/intake/slack.js +378 -0
- package/dist/intake/slack.js.map +1 -0
- package/dist/intake/twilio.d.ts +86 -0
- package/dist/intake/twilio.d.ts.map +1 -0
- package/dist/intake/twilio.js +283 -0
- package/dist/intake/twilio.js.map +1 -0
- package/dist/knowledge/index.d.ts +100 -0
- package/dist/knowledge/index.d.ts.map +1 -0
- package/dist/knowledge/index.js +516 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/knowledge/invoice-resolver.d.ts +62 -0
- package/dist/knowledge/invoice-resolver.d.ts.map +1 -0
- package/dist/knowledge/invoice-resolver.js +267 -0
- package/dist/knowledge/invoice-resolver.js.map +1 -0
- package/dist/types.d.ts +535 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +48 -0
- package/dist/types.js.map +1 -0
- package/ga-service-account.json +13 -0
- package/gmail-knowledge-migration.sql +149 -0
- package/nul +1 -0
- package/package.json +55 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LRU Cache for Embedding Queries
|
|
3
|
+
*
|
|
4
|
+
* In-memory cache with TTL support to reduce redundant embedding API calls.
|
|
5
|
+
* Stores query text -> embedding vector mappings.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Simple LRU Cache with TTL support
|
|
9
|
+
*/
|
|
10
|
+
export class LRUCache {
|
|
11
|
+
cache;
|
|
12
|
+
maxSize;
|
|
13
|
+
ttlMs;
|
|
14
|
+
constructor(maxSize = 1000, ttlMs = 300000) {
|
|
15
|
+
this.cache = new Map();
|
|
16
|
+
this.maxSize = maxSize;
|
|
17
|
+
this.ttlMs = ttlMs;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Get a value from the cache
|
|
21
|
+
*/
|
|
22
|
+
get(key) {
|
|
23
|
+
const entry = this.cache.get(key);
|
|
24
|
+
if (!entry) {
|
|
25
|
+
return undefined;
|
|
26
|
+
}
|
|
27
|
+
// Check if expired
|
|
28
|
+
if (Date.now() > entry.expiresAt) {
|
|
29
|
+
this.cache.delete(key);
|
|
30
|
+
return undefined;
|
|
31
|
+
}
|
|
32
|
+
// Move to end for LRU behavior (most recently used)
|
|
33
|
+
this.cache.delete(key);
|
|
34
|
+
this.cache.set(key, entry);
|
|
35
|
+
return entry.value;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Set a value in the cache
|
|
39
|
+
*/
|
|
40
|
+
set(key, value, customTtlMs) {
|
|
41
|
+
// If at capacity, remove oldest entry (first item in Map)
|
|
42
|
+
if (this.cache.size >= this.maxSize) {
|
|
43
|
+
const firstKey = this.cache.keys().next().value;
|
|
44
|
+
if (firstKey !== undefined) {
|
|
45
|
+
this.cache.delete(firstKey);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
const ttl = customTtlMs ?? this.ttlMs;
|
|
49
|
+
const entry = {
|
|
50
|
+
value,
|
|
51
|
+
expiresAt: Date.now() + ttl,
|
|
52
|
+
};
|
|
53
|
+
this.cache.set(key, entry);
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Check if key exists and is not expired
|
|
57
|
+
*/
|
|
58
|
+
has(key) {
|
|
59
|
+
const entry = this.cache.get(key);
|
|
60
|
+
if (!entry) {
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
if (Date.now() > entry.expiresAt) {
|
|
64
|
+
this.cache.delete(key);
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Delete a key from the cache
|
|
71
|
+
*/
|
|
72
|
+
delete(key) {
|
|
73
|
+
return this.cache.delete(key);
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Clear all entries
|
|
77
|
+
*/
|
|
78
|
+
clear() {
|
|
79
|
+
this.cache.clear();
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Get current cache size
|
|
83
|
+
*/
|
|
84
|
+
get size() {
|
|
85
|
+
return this.cache.size;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Remove expired entries
|
|
89
|
+
*/
|
|
90
|
+
prune() {
|
|
91
|
+
const now = Date.now();
|
|
92
|
+
let pruned = 0;
|
|
93
|
+
for (const [key, entry] of this.cache) {
|
|
94
|
+
if (now > entry.expiresAt) {
|
|
95
|
+
this.cache.delete(key);
|
|
96
|
+
pruned++;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return pruned;
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Get cache statistics
|
|
103
|
+
*/
|
|
104
|
+
getStats() {
|
|
105
|
+
return {
|
|
106
|
+
size: this.cache.size,
|
|
107
|
+
maxSize: this.maxSize,
|
|
108
|
+
ttlMs: this.ttlMs,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Specialized cache for embedding vectors
|
|
114
|
+
* Uses content hash as key to handle exact and near-duplicate queries
|
|
115
|
+
*/
|
|
116
|
+
export class EmbeddingCache {
|
|
117
|
+
cache;
|
|
118
|
+
hits = 0;
|
|
119
|
+
misses = 0;
|
|
120
|
+
constructor(maxSize = 1000, ttlMs = 300000) {
|
|
121
|
+
this.cache = new LRUCache(maxSize, ttlMs);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Generate a cache key from text
|
|
125
|
+
* Simple hash function for in-memory use
|
|
126
|
+
*/
|
|
127
|
+
generateKey(text) {
|
|
128
|
+
// Normalize text: lowercase, trim, collapse whitespace
|
|
129
|
+
const normalized = text.toLowerCase().trim().replace(/\s+/g, ' ');
|
|
130
|
+
// Simple hash (FNV-1a variant)
|
|
131
|
+
let hash = 2166136261;
|
|
132
|
+
for (let i = 0; i < normalized.length; i++) {
|
|
133
|
+
hash ^= normalized.charCodeAt(i);
|
|
134
|
+
hash = (hash * 16777619) >>> 0;
|
|
135
|
+
}
|
|
136
|
+
return hash.toString(16);
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Get embedding from cache
|
|
140
|
+
*/
|
|
141
|
+
get(text) {
|
|
142
|
+
const key = this.generateKey(text);
|
|
143
|
+
const result = this.cache.get(key);
|
|
144
|
+
if (result) {
|
|
145
|
+
this.hits++;
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
this.misses++;
|
|
149
|
+
}
|
|
150
|
+
return result;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Store embedding in cache
|
|
154
|
+
*/
|
|
155
|
+
set(text, embedding) {
|
|
156
|
+
const key = this.generateKey(text);
|
|
157
|
+
this.cache.set(key, embedding);
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Check if text has cached embedding
|
|
161
|
+
*/
|
|
162
|
+
has(text) {
|
|
163
|
+
const key = this.generateKey(text);
|
|
164
|
+
return this.cache.has(key);
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Clear the cache
|
|
168
|
+
*/
|
|
169
|
+
clear() {
|
|
170
|
+
this.cache.clear();
|
|
171
|
+
this.hits = 0;
|
|
172
|
+
this.misses = 0;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Get cache statistics
|
|
176
|
+
*/
|
|
177
|
+
getStats() {
|
|
178
|
+
const total = this.hits + this.misses;
|
|
179
|
+
return {
|
|
180
|
+
size: this.cache.size,
|
|
181
|
+
hits: this.hits,
|
|
182
|
+
misses: this.misses,
|
|
183
|
+
hitRate: total > 0 ? this.hits / total : 0,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
/**
|
|
187
|
+
* Prune expired entries
|
|
188
|
+
*/
|
|
189
|
+
prune() {
|
|
190
|
+
return this.cache.prune();
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
//# sourceMappingURL=cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cache.js","sourceRoot":"","sources":["../../src/embeddings/cache.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAOH;;GAEG;AACH,MAAM,OAAO,QAAQ;IACX,KAAK,CAAwB;IACpB,OAAO,CAAS;IAChB,KAAK,CAAS;IAE/B,YAAY,UAAkB,IAAI,EAAE,QAAgB,MAAM;QACxD,IAAI,CAAC,KAAK,GAAG,IAAI,GAAG,EAAE,CAAC;QACvB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,GAAM;QACR,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAElC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,mBAAmB;QACnB,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,oDAAoD;QACpD,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACvB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAE3B,OAAO,KAAK,CAAC,KAAK,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,GAAM,EAAE,KAAQ,EAAE,WAAoB;QACxC,0DAA0D;QAC1D,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACpC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;YAChD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;gBAC3B,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,MAAM,GAAG,GAAG,WAAW,IAAI,IAAI,CAAC,KAAK,CAAC;QACtC,MAAM,KAAK,GAAkB;YAC3B,KAAK;YACL,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,GAAG;SAC5B,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,GAAM;QACR,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,OAAO,KAAK,CAAC;QACf,CAAC;QACD,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YACvB,OAAO,KAAK,CAAC;QACf,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,GAAM;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IACrB,CAAC;IAED;;OAEG;IACH,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,KAAK;QACH,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QACvB,IAAI,MAAM,GAAG,CAAC,CAAC;QAEf,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACtC,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,EAAE,CAAC;gBAC1B,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACvB,MAAM,EAAE,CAAC;YACX,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACH,QAAQ;QAKN,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,KAAK,EAAE,IAAI,CAAC,KAAK;SAClB,CAAC;IACJ,CAAC;CACF;AAED;;;GAGG;AACH,MAAM,OAAO,cAAc;IACjB,KAAK,CAA6B;IAClC,IAAI,GAAW,CAAC,CAAC;IACjB,MAAM,GAAW,CAAC,CAAC;IAE3B,YAAY,UAAkB,IAAI,EAAE,QAAgB,MAAM;QACxD,IAAI,CAAC,KAAK,GAAG,IAAI,QAAQ,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAC5C,CAAC;IAED;;;OAGG;IACK,WAAW,CAAC,IAAY;QAC9B,uDAAuD;QACvD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAElE,+BAA+B;QAC/B,IAAI,IAAI,GAAG,UAAU,CAAC;QACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC3C,IAAI,IAAI,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACjC,IAAI,GAAG,CAAC,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;QACjC,CAAC;QAED,OAAO,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY;QACd,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEnC,IAAI,MAAM,EAAE,CAAC;YACX,IAAI,CAAC,IAAI,EAAE,CAAC;QACd,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY,EAAE,SAAmB;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IACjC,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY;QACd,MAAM,GAAG,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;QACnB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QACd,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,QAAQ;QAMN,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC;QACtC,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI;YACrB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;SAC3C,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;IAC5B,CAAC;CACF"}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Service Module
|
|
3
|
+
*
|
|
4
|
+
* Provides semantic embedding generation for the Gmail Knowledge Base.
|
|
5
|
+
* Uses OpenAI text-embedding-3-small model for 1536-dimension embeddings.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Single text and batch embedding generation
|
|
9
|
+
* - LRU cache with TTL for query embeddings
|
|
10
|
+
* - Embeddable text formatting for knowledge entries
|
|
11
|
+
* - Integration with Supabase for storing embeddings
|
|
12
|
+
*/
|
|
13
|
+
export interface EmbeddingServiceConfig {
|
|
14
|
+
openaiApiKey: string;
|
|
15
|
+
embeddingModel?: string;
|
|
16
|
+
embeddingDimensions?: number;
|
|
17
|
+
cacheMaxSize?: number;
|
|
18
|
+
cacheTtlMs?: number;
|
|
19
|
+
supabaseUrl: string;
|
|
20
|
+
supabaseServiceRoleKey: string;
|
|
21
|
+
}
|
|
22
|
+
export interface KnowledgeEmbeddingInput {
|
|
23
|
+
questionSubject?: string;
|
|
24
|
+
questionText: string;
|
|
25
|
+
responseText: string;
|
|
26
|
+
resolutionOutcome?: string;
|
|
27
|
+
}
|
|
28
|
+
export interface EmbeddingStats {
|
|
29
|
+
cache: {
|
|
30
|
+
size: number;
|
|
31
|
+
hits: number;
|
|
32
|
+
misses: number;
|
|
33
|
+
hitRate: number;
|
|
34
|
+
};
|
|
35
|
+
totalEmbeddingsGenerated: number;
|
|
36
|
+
totalTokensUsed: number;
|
|
37
|
+
}
|
|
38
|
+
export declare class EmbeddingService {
|
|
39
|
+
private client;
|
|
40
|
+
private cache;
|
|
41
|
+
private supabase;
|
|
42
|
+
private totalEmbeddingsGenerated;
|
|
43
|
+
private totalTokensUsed;
|
|
44
|
+
constructor(config: EmbeddingServiceConfig);
|
|
45
|
+
/**
|
|
46
|
+
* Generate embedding for a single text, using cache when available
|
|
47
|
+
*/
|
|
48
|
+
embed(text: string): Promise<number[]>;
|
|
49
|
+
/**
|
|
50
|
+
* Generate embeddings for multiple texts in batch
|
|
51
|
+
* Does not use cache (typically for bulk operations)
|
|
52
|
+
*/
|
|
53
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
54
|
+
/**
|
|
55
|
+
* Generate embedding for a query (uses cache, intended for similarity search)
|
|
56
|
+
*/
|
|
57
|
+
embedQuery(queryText: string): Promise<number[]>;
|
|
58
|
+
/**
|
|
59
|
+
* Format a knowledge entry into embeddable text
|
|
60
|
+
*
|
|
61
|
+
* Structure:
|
|
62
|
+
* [QUESTION]: {subject} - {text}
|
|
63
|
+
* [RESPONSE]: {text}
|
|
64
|
+
* [OUTCOME]: {resolution}
|
|
65
|
+
*/
|
|
66
|
+
formatKnowledgeEntry(input: KnowledgeEmbeddingInput): string;
|
|
67
|
+
/**
|
|
68
|
+
* Generate embedding for a knowledge entry
|
|
69
|
+
*/
|
|
70
|
+
embedKnowledgeEntry(input: KnowledgeEmbeddingInput): Promise<number[]>;
|
|
71
|
+
/**
|
|
72
|
+
* Generate embeddings for multiple knowledge entries
|
|
73
|
+
*/
|
|
74
|
+
embedKnowledgeEntries(inputs: KnowledgeEmbeddingInput[]): Promise<number[][]>;
|
|
75
|
+
/**
|
|
76
|
+
* Update embedding for a Gmail knowledge entry
|
|
77
|
+
*/
|
|
78
|
+
updateKnowledgeEntryEmbedding(entryId: string, embedding: number[]): Promise<void>;
|
|
79
|
+
/**
|
|
80
|
+
* Generate and store embedding for a Gmail knowledge entry
|
|
81
|
+
*/
|
|
82
|
+
generateAndStoreEmbedding(entryId: string, input: KnowledgeEmbeddingInput): Promise<number[]>;
|
|
83
|
+
/**
|
|
84
|
+
* Batch generate and store embeddings for multiple entries
|
|
85
|
+
*/
|
|
86
|
+
generateAndStoreBatchEmbeddings(entries: Array<{
|
|
87
|
+
id: string;
|
|
88
|
+
input: KnowledgeEmbeddingInput;
|
|
89
|
+
}>): Promise<void>;
|
|
90
|
+
/**
|
|
91
|
+
* Get entries that need embeddings
|
|
92
|
+
*/
|
|
93
|
+
getEntriesNeedingEmbeddings(limit?: number): Promise<Array<{
|
|
94
|
+
id: string;
|
|
95
|
+
questionSubject: string | null;
|
|
96
|
+
questionText: string;
|
|
97
|
+
responseText: string;
|
|
98
|
+
resolutionIndicator: string | null;
|
|
99
|
+
}>>;
|
|
100
|
+
/**
|
|
101
|
+
* Backfill embeddings for all entries without embeddings
|
|
102
|
+
*/
|
|
103
|
+
backfillEmbeddings(batchSize?: number, onProgress?: (processed: number, total: number) => void): Promise<{
|
|
104
|
+
processed: number;
|
|
105
|
+
errors: number;
|
|
106
|
+
}>;
|
|
107
|
+
/**
|
|
108
|
+
* Find similar knowledge entries using vector similarity
|
|
109
|
+
*/
|
|
110
|
+
findSimilar(queryText: string, options?: {
|
|
111
|
+
matchThreshold?: number;
|
|
112
|
+
matchCount?: number;
|
|
113
|
+
filterIntent?: string;
|
|
114
|
+
onlyApproved?: boolean;
|
|
115
|
+
}): Promise<Array<{
|
|
116
|
+
id: string;
|
|
117
|
+
questionSubject: string | null;
|
|
118
|
+
questionText: string;
|
|
119
|
+
responseText: string;
|
|
120
|
+
intentCategory: string | null;
|
|
121
|
+
qualityScore: number;
|
|
122
|
+
similarity: number;
|
|
123
|
+
emailReceivedAt: string;
|
|
124
|
+
resolutionIndicator: string | null;
|
|
125
|
+
}>>;
|
|
126
|
+
/**
|
|
127
|
+
* Get service statistics
|
|
128
|
+
*/
|
|
129
|
+
getStats(): EmbeddingStats;
|
|
130
|
+
/**
|
|
131
|
+
* Clear the embedding cache
|
|
132
|
+
*/
|
|
133
|
+
clearCache(): void;
|
|
134
|
+
/**
|
|
135
|
+
* Get embedding configuration
|
|
136
|
+
*/
|
|
137
|
+
getConfig(): {
|
|
138
|
+
model: string;
|
|
139
|
+
dimensions: number;
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Get the singleton embedding service instance
|
|
144
|
+
*/
|
|
145
|
+
export declare function getEmbeddingService(): EmbeddingService;
|
|
146
|
+
/**
|
|
147
|
+
* Reset the singleton (for testing)
|
|
148
|
+
*/
|
|
149
|
+
export declare function resetEmbeddingService(): void;
|
|
150
|
+
export { EmbeddingCache } from './cache.js';
|
|
151
|
+
export { OpenAIEmbeddingClient, type EmbeddingResponse, type BatchEmbeddingResponse } from './openai-client.js';
|
|
152
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/embeddings/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAWH,MAAM,WAAW,sBAAsB;IACrC,YAAY,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,WAAW,EAAE,MAAM,CAAC;IACpB,sBAAsB,EAAE,MAAM,CAAC;CAChC;AAED,MAAM,WAAW,uBAAuB;IACtC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,IAAI,EAAE,MAAM,CAAC;QACb,MAAM,EAAE,MAAM,CAAC;QACf,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;IACF,wBAAwB,EAAE,MAAM,CAAC;IACjC,eAAe,EAAE,MAAM,CAAC;CACzB;AAMD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAAwB;IACtC,OAAO,CAAC,KAAK,CAAiB;IAC9B,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,wBAAwB,CAAa;IAC7C,OAAO,CAAC,eAAe,CAAa;gBAExB,MAAM,EAAE,sBAAsB;IAmB1C;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAoB5C;;;OAGG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAUtD;;OAEG;IACG,UAAU,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAQtD;;;;;;;OAOG;IACH,oBAAoB,CAAC,KAAK,EAAE,uBAAuB,GAAG,MAAM;IAoB5D;;OAEG;IACG,mBAAmB,CAAC,KAAK,EAAE,uBAAuB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAK5E;;OAEG;IACG,qBAAqB,CAAC,MAAM,EAAE,uBAAuB,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IASnF;;OAEG;IACG,6BAA6B,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAkBxF;;OAEG;IACG,yBAAyB,CAC7B,OAAO,EAAE,MAAM,EACf,KAAK,EAAE,uBAAuB,GAC7B,OAAO,CAAC,MAAM,EAAE,CAAC;IAMpB;;OAEG;IACG,+BAA+B,CACnC,OAAO,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,uBAAuB,CAAA;KAAE,CAAC,GAC7D,OAAO,CAAC,IAAI,CAAC;IA0BhB;;OAEG;IACG,2BAA2B,CAAC,KAAK,GAAE,MAAY,GAAG,OAAO,CAC7D,KAAK,CAAC;QACJ,EAAE,EAAE,MAAM,CAAC;QACX,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;QAC/B,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;KACpC,CAAC,CACH;IAwBD;;OAEG;IACG,kBAAkB,CACtB,SAAS,GAAE,MAAW,EACtB,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACtD,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAA;KAAE,CAAC;IAkEjD;;OAEG;IACG,WAAW,CACf,SAAS,EAAE,MAAM,EACjB,OAAO,GAAE;QACP,cAAc,CAAC,EAAE,MAAM,CAAC;QACxB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,YAAY,CAAC,EAAE,OAAO,CAAC;KACnB,GACL,OAAO,CACR,KAAK,CAAC;QACJ,EAAE,EAAE,MAAM,CAAC;QACX,eAAe,EAAE,MAAM,GAAG,IAAI,CAAC;QAC/B,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;QACrB,cAAc,EAAE,MAAM,GAAG,IAAI,CAAC;QAC9B,YAAY,EAAE,MAAM,CAAC;QACrB,UAAU,EAAE,MAAM,CAAC;QACnB,eAAe,EAAE,MAAM,CAAC;QACxB,mBAAmB,EAAE,MAAM,GAAG,IAAI,CAAC;KACpC,CAAC,CACH;IA6CD;;OAEG;IACH,QAAQ,IAAI,cAAc;IAQ1B;;OAEG;IACH,UAAU,IAAI,IAAI;IAIlB;;OAEG;IACH,SAAS,IAAI;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE;CAGnD;AAQD;;GAEG;AACH,wBAAgB,mBAAmB,IAAI,gBAAgB,CAgCtD;AAED;;GAEG;AACH,wBAAgB,qBAAqB,IAAI,IAAI,CAE5C;AAGD,OAAO,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAC5C,OAAO,EAAE,qBAAqB,EAAE,KAAK,iBAAiB,EAAE,KAAK,sBAAsB,EAAE,MAAM,oBAAoB,CAAC"}
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Service Module
|
|
3
|
+
*
|
|
4
|
+
* Provides semantic embedding generation for the Gmail Knowledge Base.
|
|
5
|
+
* Uses OpenAI text-embedding-3-small model for 1536-dimension embeddings.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Single text and batch embedding generation
|
|
9
|
+
* - LRU cache with TTL for query embeddings
|
|
10
|
+
* - Embeddable text formatting for knowledge entries
|
|
11
|
+
* - Integration with Supabase for storing embeddings
|
|
12
|
+
*/
|
|
13
|
+
import { createClient } from '@supabase/supabase-js';
|
|
14
|
+
import { OpenAIEmbeddingClient } from './openai-client.js';
|
|
15
|
+
import { EmbeddingCache } from './cache.js';
|
|
16
|
+
import { EmbeddingError } from '../errors.js';
|
|
17
|
+
// =============================================================================
|
|
18
|
+
// Embedding Service
|
|
19
|
+
// =============================================================================
|
|
20
|
+
export class EmbeddingService {
|
|
21
|
+
client;
|
|
22
|
+
cache;
|
|
23
|
+
supabase;
|
|
24
|
+
totalEmbeddingsGenerated = 0;
|
|
25
|
+
totalTokensUsed = 0;
|
|
26
|
+
constructor(config) {
|
|
27
|
+
this.client = new OpenAIEmbeddingClient({
|
|
28
|
+
apiKey: config.openaiApiKey,
|
|
29
|
+
model: config.embeddingModel || 'text-embedding-3-small',
|
|
30
|
+
dimensions: config.embeddingDimensions || 1536,
|
|
31
|
+
});
|
|
32
|
+
this.cache = new EmbeddingCache(config.cacheMaxSize || 1000, config.cacheTtlMs || 300000 // 5 minutes default
|
|
33
|
+
);
|
|
34
|
+
this.supabase = createClient(config.supabaseUrl, config.supabaseServiceRoleKey);
|
|
35
|
+
}
|
|
36
|
+
// ===========================================================================
|
|
37
|
+
// Core Embedding Methods
|
|
38
|
+
// ===========================================================================
|
|
39
|
+
/**
|
|
40
|
+
* Generate embedding for a single text, using cache when available
|
|
41
|
+
*/
|
|
42
|
+
async embed(text) {
|
|
43
|
+
// Check cache first
|
|
44
|
+
const cached = this.cache.get(text);
|
|
45
|
+
if (cached) {
|
|
46
|
+
return cached;
|
|
47
|
+
}
|
|
48
|
+
// Generate new embedding
|
|
49
|
+
const response = await this.client.embed(text);
|
|
50
|
+
// Cache the result
|
|
51
|
+
this.cache.set(text, response.embedding);
|
|
52
|
+
// Track stats
|
|
53
|
+
this.totalEmbeddingsGenerated++;
|
|
54
|
+
this.totalTokensUsed += response.usage.totalTokens;
|
|
55
|
+
return response.embedding;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Generate embeddings for multiple texts in batch
|
|
59
|
+
* Does not use cache (typically for bulk operations)
|
|
60
|
+
*/
|
|
61
|
+
async embedBatch(texts) {
|
|
62
|
+
const response = await this.client.embedBatch(texts);
|
|
63
|
+
// Track stats
|
|
64
|
+
this.totalEmbeddingsGenerated += response.embeddings.length;
|
|
65
|
+
this.totalTokensUsed += response.usage.totalTokens;
|
|
66
|
+
return response.embeddings;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Generate embedding for a query (uses cache, intended for similarity search)
|
|
70
|
+
*/
|
|
71
|
+
async embedQuery(queryText) {
|
|
72
|
+
return this.embed(queryText);
|
|
73
|
+
}
|
|
74
|
+
// ===========================================================================
|
|
75
|
+
// Knowledge Entry Formatting
|
|
76
|
+
// ===========================================================================
|
|
77
|
+
/**
|
|
78
|
+
* Format a knowledge entry into embeddable text
|
|
79
|
+
*
|
|
80
|
+
* Structure:
|
|
81
|
+
* [QUESTION]: {subject} - {text}
|
|
82
|
+
* [RESPONSE]: {text}
|
|
83
|
+
* [OUTCOME]: {resolution}
|
|
84
|
+
*/
|
|
85
|
+
formatKnowledgeEntry(input) {
|
|
86
|
+
const parts = [];
|
|
87
|
+
// Question section
|
|
88
|
+
const questionPart = input.questionSubject
|
|
89
|
+
? `${input.questionSubject} - ${input.questionText}`
|
|
90
|
+
: input.questionText;
|
|
91
|
+
parts.push(`[QUESTION]: ${questionPart}`);
|
|
92
|
+
// Response section
|
|
93
|
+
parts.push(`[RESPONSE]: ${input.responseText}`);
|
|
94
|
+
// Outcome section (if available)
|
|
95
|
+
if (input.resolutionOutcome) {
|
|
96
|
+
parts.push(`[OUTCOME]: ${input.resolutionOutcome}`);
|
|
97
|
+
}
|
|
98
|
+
return parts.join('\n');
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Generate embedding for a knowledge entry
|
|
102
|
+
*/
|
|
103
|
+
async embedKnowledgeEntry(input) {
|
|
104
|
+
const formattedText = this.formatKnowledgeEntry(input);
|
|
105
|
+
return this.embed(formattedText);
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Generate embeddings for multiple knowledge entries
|
|
109
|
+
*/
|
|
110
|
+
async embedKnowledgeEntries(inputs) {
|
|
111
|
+
const formattedTexts = inputs.map((input) => this.formatKnowledgeEntry(input));
|
|
112
|
+
return this.embedBatch(formattedTexts);
|
|
113
|
+
}
|
|
114
|
+
// ===========================================================================
|
|
115
|
+
// Database Operations
|
|
116
|
+
// ===========================================================================
|
|
117
|
+
/**
|
|
118
|
+
* Update embedding for a Gmail knowledge entry
|
|
119
|
+
*/
|
|
120
|
+
async updateKnowledgeEntryEmbedding(entryId, embedding) {
|
|
121
|
+
const { error } = await this.supabase
|
|
122
|
+
.from('gmail_knowledge_entries')
|
|
123
|
+
.update({
|
|
124
|
+
embedding: `[${embedding.join(',')}]`,
|
|
125
|
+
updated_at: new Date().toISOString(),
|
|
126
|
+
})
|
|
127
|
+
.eq('id', entryId);
|
|
128
|
+
if (error) {
|
|
129
|
+
throw new EmbeddingError(`Failed to update embedding for entry ${entryId}: ${error.message}`, 'api_error', { context: { entryId, error } });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Generate and store embedding for a Gmail knowledge entry
|
|
134
|
+
*/
|
|
135
|
+
async generateAndStoreEmbedding(entryId, input) {
|
|
136
|
+
const embedding = await this.embedKnowledgeEntry(input);
|
|
137
|
+
await this.updateKnowledgeEntryEmbedding(entryId, embedding);
|
|
138
|
+
return embedding;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Batch generate and store embeddings for multiple entries
|
|
142
|
+
*/
|
|
143
|
+
async generateAndStoreBatchEmbeddings(entries) {
|
|
144
|
+
if (entries.length === 0) {
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
// Generate embeddings in batch
|
|
148
|
+
const inputs = entries.map((e) => e.input);
|
|
149
|
+
const embeddings = await this.embedKnowledgeEntries(inputs);
|
|
150
|
+
// Update each entry in the database
|
|
151
|
+
// Note: Supabase doesn't support batch updates with different values,
|
|
152
|
+
// so we do this in parallel with Promise.all
|
|
153
|
+
const updatePromises = entries.map((entry, index) => {
|
|
154
|
+
const embedding = embeddings[index];
|
|
155
|
+
if (!embedding) {
|
|
156
|
+
throw new EmbeddingError(`Missing embedding for entry at index ${index}`, 'validation');
|
|
157
|
+
}
|
|
158
|
+
return this.updateKnowledgeEntryEmbedding(entry.id, embedding);
|
|
159
|
+
});
|
|
160
|
+
await Promise.all(updatePromises);
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Get entries that need embeddings
|
|
164
|
+
*/
|
|
165
|
+
async getEntriesNeedingEmbeddings(limit = 100) {
|
|
166
|
+
const { data, error } = await this.supabase
|
|
167
|
+
.from('gmail_knowledge_entries')
|
|
168
|
+
.select('id, question_subject, question_text, response_text, resolution_indicator')
|
|
169
|
+
.is('embedding', null)
|
|
170
|
+
.limit(limit);
|
|
171
|
+
if (error) {
|
|
172
|
+
throw new EmbeddingError(`Failed to fetch entries needing embeddings: ${error.message}`, 'api_error', { context: { error } });
|
|
173
|
+
}
|
|
174
|
+
return (data || []).map((row) => ({
|
|
175
|
+
id: row.id,
|
|
176
|
+
questionSubject: row.question_subject,
|
|
177
|
+
questionText: row.question_text,
|
|
178
|
+
responseText: row.response_text,
|
|
179
|
+
resolutionIndicator: row.resolution_indicator,
|
|
180
|
+
}));
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Backfill embeddings for all entries without embeddings
|
|
184
|
+
*/
|
|
185
|
+
async backfillEmbeddings(batchSize = 50, onProgress) {
|
|
186
|
+
let processed = 0;
|
|
187
|
+
let errors = 0;
|
|
188
|
+
let total = 0;
|
|
189
|
+
// Get total count
|
|
190
|
+
const { count } = await this.supabase
|
|
191
|
+
.from('gmail_knowledge_entries')
|
|
192
|
+
.select('id', { count: 'exact', head: true })
|
|
193
|
+
.is('embedding', null);
|
|
194
|
+
total = count || 0;
|
|
195
|
+
while (true) {
|
|
196
|
+
const entries = await this.getEntriesNeedingEmbeddings(batchSize);
|
|
197
|
+
if (entries.length === 0) {
|
|
198
|
+
break;
|
|
199
|
+
}
|
|
200
|
+
try {
|
|
201
|
+
const batchEntries = entries.map((entry) => ({
|
|
202
|
+
id: entry.id,
|
|
203
|
+
input: {
|
|
204
|
+
questionSubject: entry.questionSubject || undefined,
|
|
205
|
+
questionText: entry.questionText,
|
|
206
|
+
responseText: entry.responseText,
|
|
207
|
+
resolutionOutcome: entry.resolutionIndicator || undefined,
|
|
208
|
+
},
|
|
209
|
+
}));
|
|
210
|
+
await this.generateAndStoreBatchEmbeddings(batchEntries);
|
|
211
|
+
processed += entries.length;
|
|
212
|
+
}
|
|
213
|
+
catch (error) {
|
|
214
|
+
console.error('Error processing batch:', error);
|
|
215
|
+
errors += entries.length;
|
|
216
|
+
// Try individual entries to isolate the problematic ones
|
|
217
|
+
for (const entry of entries) {
|
|
218
|
+
try {
|
|
219
|
+
await this.generateAndStoreEmbedding(entry.id, {
|
|
220
|
+
questionSubject: entry.questionSubject || undefined,
|
|
221
|
+
questionText: entry.questionText,
|
|
222
|
+
responseText: entry.responseText,
|
|
223
|
+
resolutionOutcome: entry.resolutionIndicator || undefined,
|
|
224
|
+
});
|
|
225
|
+
processed++;
|
|
226
|
+
errors--; // Reduce error count since this one succeeded
|
|
227
|
+
}
|
|
228
|
+
catch (individualError) {
|
|
229
|
+
console.error(`Error processing entry ${entry.id}:`, individualError);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
if (onProgress) {
|
|
234
|
+
onProgress(processed, total);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return { processed, errors };
|
|
238
|
+
}
|
|
239
|
+
// ===========================================================================
|
|
240
|
+
// Similarity Search
|
|
241
|
+
// ===========================================================================
|
|
242
|
+
/**
|
|
243
|
+
* Find similar knowledge entries using vector similarity
|
|
244
|
+
*/
|
|
245
|
+
async findSimilar(queryText, options = {}) {
|
|
246
|
+
const { matchThreshold = 0.78, matchCount = 5, filterIntent = null, onlyApproved = true, } = options;
|
|
247
|
+
// Generate embedding for query
|
|
248
|
+
const queryEmbedding = await this.embedQuery(queryText);
|
|
249
|
+
// Call RPC function
|
|
250
|
+
const { data, error } = await this.supabase.rpc('match_gmail_knowledge', {
|
|
251
|
+
query_embedding: `[${queryEmbedding.join(',')}]`,
|
|
252
|
+
match_threshold: matchThreshold,
|
|
253
|
+
match_count: matchCount,
|
|
254
|
+
filter_intent: filterIntent,
|
|
255
|
+
only_approved: onlyApproved,
|
|
256
|
+
});
|
|
257
|
+
if (error) {
|
|
258
|
+
throw new EmbeddingError(`Similarity search failed: ${error.message}`, 'api_error', { context: { error } });
|
|
259
|
+
}
|
|
260
|
+
return (data || []).map((row) => ({
|
|
261
|
+
id: row.id,
|
|
262
|
+
questionSubject: row.question_subject,
|
|
263
|
+
questionText: row.question_text,
|
|
264
|
+
responseText: row.response_text,
|
|
265
|
+
intentCategory: row.intent_category,
|
|
266
|
+
qualityScore: Number(row.quality_score),
|
|
267
|
+
similarity: Number(row.similarity),
|
|
268
|
+
emailReceivedAt: row.email_received_at,
|
|
269
|
+
resolutionIndicator: row.resolution_indicator,
|
|
270
|
+
}));
|
|
271
|
+
}
|
|
272
|
+
// ===========================================================================
|
|
273
|
+
// Utility Methods
|
|
274
|
+
// ===========================================================================
|
|
275
|
+
/**
|
|
276
|
+
* Get service statistics
|
|
277
|
+
*/
|
|
278
|
+
getStats() {
|
|
279
|
+
return {
|
|
280
|
+
cache: this.cache.getStats(),
|
|
281
|
+
totalEmbeddingsGenerated: this.totalEmbeddingsGenerated,
|
|
282
|
+
totalTokensUsed: this.totalTokensUsed,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
/**
|
|
286
|
+
* Clear the embedding cache
|
|
287
|
+
*/
|
|
288
|
+
clearCache() {
|
|
289
|
+
this.cache.clear();
|
|
290
|
+
}
|
|
291
|
+
/**
|
|
292
|
+
* Get embedding configuration
|
|
293
|
+
*/
|
|
294
|
+
getConfig() {
|
|
295
|
+
return this.client.getConfig();
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
// =============================================================================
|
|
299
|
+
// Singleton Instance
|
|
300
|
+
// =============================================================================
|
|
301
|
+
let embeddingServiceInstance = null;
|
|
302
|
+
/**
|
|
303
|
+
* Get the singleton embedding service instance
|
|
304
|
+
*/
|
|
305
|
+
export function getEmbeddingService() {
|
|
306
|
+
if (!embeddingServiceInstance) {
|
|
307
|
+
const openaiApiKey = process.env.OPENAI_API_KEY;
|
|
308
|
+
const supabaseUrl = process.env.SUPABASE_URL || process.env.NEXT_PUBLIC_SUPABASE_URL;
|
|
309
|
+
const supabaseServiceRoleKey = process.env.SUPABASE_SERVICE_ROLE_KEY;
|
|
310
|
+
if (!openaiApiKey) {
|
|
311
|
+
throw new EmbeddingError('OPENAI_API_KEY environment variable is required for embedding service', 'validation');
|
|
312
|
+
}
|
|
313
|
+
if (!supabaseUrl || !supabaseServiceRoleKey) {
|
|
314
|
+
throw new EmbeddingError('SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY are required for embedding service', 'validation');
|
|
315
|
+
}
|
|
316
|
+
embeddingServiceInstance = new EmbeddingService({
|
|
317
|
+
openaiApiKey,
|
|
318
|
+
supabaseUrl,
|
|
319
|
+
supabaseServiceRoleKey,
|
|
320
|
+
embeddingModel: process.env.OPENAI_EMBEDDING_MODEL || 'text-embedding-3-small',
|
|
321
|
+
embeddingDimensions: parseInt(process.env.OPENAI_EMBEDDING_DIMENSIONS || '1536', 10),
|
|
322
|
+
cacheMaxSize: parseInt(process.env.EMBEDDING_CACHE_MAX_SIZE || '1000', 10),
|
|
323
|
+
cacheTtlMs: parseInt(process.env.EMBEDDING_CACHE_TTL_MS || '300000', 10),
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
return embeddingServiceInstance;
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Reset the singleton (for testing)
|
|
330
|
+
*/
|
|
331
|
+
export function resetEmbeddingService() {
|
|
332
|
+
embeddingServiceInstance = null;
|
|
333
|
+
}
|
|
334
|
+
// Re-export types
|
|
335
|
+
export { EmbeddingCache } from './cache.js';
|
|
336
|
+
export { OpenAIEmbeddingClient } from './openai-client.js';
|
|
337
|
+
//# sourceMappingURL=index.js.map
|