@tamyla/clodo-framework 4.3.4 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +34 -8
- package/dist/utilities/ai/client.js +276 -0
- package/dist/utilities/ai/index.js +6 -0
- package/dist/utilities/analytics/index.js +6 -0
- package/dist/utilities/analytics/writer.js +226 -0
- package/dist/utilities/bindings/client.js +283 -0
- package/dist/utilities/bindings/index.js +6 -0
- package/dist/utilities/cache/index.js +9 -0
- package/dist/utilities/cache/leaderboard.js +52 -0
- package/dist/utilities/cache/rate-limiter.js +57 -0
- package/dist/utilities/cache/session.js +69 -0
- package/dist/utilities/cache/upstash.js +200 -0
- package/dist/utilities/durable-objects/base.js +200 -0
- package/dist/utilities/durable-objects/counter.js +117 -0
- package/dist/utilities/durable-objects/index.js +10 -0
- package/dist/utilities/durable-objects/rate-limiter.js +80 -0
- package/dist/utilities/durable-objects/session-store.js +126 -0
- package/dist/utilities/durable-objects/websocket-room.js +223 -0
- package/dist/utilities/email/handler.js +359 -0
- package/dist/utilities/email/index.js +6 -0
- package/dist/utilities/index.js +65 -0
- package/dist/utilities/kv/index.js +6 -0
- package/dist/utilities/kv/storage.js +268 -0
- package/dist/utilities/queues/consumer.js +188 -0
- package/dist/utilities/queues/index.js +7 -0
- package/dist/utilities/queues/producer.js +74 -0
- package/dist/utilities/scheduled/handler.js +276 -0
- package/dist/utilities/scheduled/index.js +6 -0
- package/dist/utilities/storage/index.js +6 -0
- package/dist/utilities/storage/r2.js +314 -0
- package/dist/utilities/vectorize/index.js +6 -0
- package/dist/utilities/vectorize/store.js +273 -0
- package/package.json +21 -3
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vectorize Store Utilities
|
|
3
|
+
* Vector database operations for semantic search and RAG applications
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* import { VectorStore, EmbeddingHelper } from '@tamyla/clodo-framework/utilities/vectorize';
|
|
7
|
+
* import { AIClient } from '@tamyla/clodo-framework/utilities/ai';
|
|
8
|
+
*
|
|
9
|
+
* const ai = new AIClient(env.AI);
|
|
10
|
+
* const vectors = new VectorStore(env.VECTORIZE_INDEX);
|
|
11
|
+
* const helper = new EmbeddingHelper(ai);
|
|
12
|
+
*
|
|
13
|
+
* // Store a document
|
|
14
|
+
* const embedding = await helper.embed('Document text...');
|
|
15
|
+
* await vectors.insert([{ id: 'doc-1', values: embedding, metadata: { title: 'Doc 1' } }]);
|
|
16
|
+
*
|
|
17
|
+
* // Search
|
|
18
|
+
* const queryEmbedding = await helper.embed('Search query');
|
|
19
|
+
* const results = await vectors.query(queryEmbedding, { topK: 10 });
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Vector Store wrapper for Cloudflare Vectorize
|
|
24
|
+
*/
|
|
25
|
+
export class VectorStore {
|
|
26
|
+
/**
|
|
27
|
+
* @param {VectorizeIndex} index - Vectorize index binding
|
|
28
|
+
*/
|
|
29
|
+
constructor(index) {
|
|
30
|
+
if (!index) {
|
|
31
|
+
throw new Error('Vectorize index binding is required');
|
|
32
|
+
}
|
|
33
|
+
this.index = index;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Insert vectors into the index
|
|
38
|
+
* @param {Array<{id: string, values: number[], metadata?: Object}>} vectors
|
|
39
|
+
* @returns {Promise<{count: number}>}
|
|
40
|
+
*/
|
|
41
|
+
async insert(vectors) {
|
|
42
|
+
const formatted = vectors.map(v => ({
|
|
43
|
+
id: v.id,
|
|
44
|
+
values: v.values,
|
|
45
|
+
metadata: v.metadata || {}
|
|
46
|
+
}));
|
|
47
|
+
return this.index.insert(formatted);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Upsert vectors (insert or update)
|
|
52
|
+
* @param {Array<{id: string, values: number[], metadata?: Object}>} vectors
|
|
53
|
+
* @returns {Promise<{count: number}>}
|
|
54
|
+
*/
|
|
55
|
+
async upsert(vectors) {
|
|
56
|
+
const formatted = vectors.map(v => ({
|
|
57
|
+
id: v.id,
|
|
58
|
+
values: v.values,
|
|
59
|
+
metadata: v.metadata || {}
|
|
60
|
+
}));
|
|
61
|
+
return this.index.upsert(formatted);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Query for similar vectors
|
|
66
|
+
* @param {number[]} vector - Query vector
|
|
67
|
+
* @param {Object} options - Query options
|
|
68
|
+
* @param {number} options.topK - Number of results (default: 10)
|
|
69
|
+
* @param {Object} options.filter - Metadata filter
|
|
70
|
+
* @param {boolean} options.returnValues - Include vector values in response
|
|
71
|
+
* @param {boolean} options.returnMetadata - Include metadata in response
|
|
72
|
+
* @returns {Promise<{matches: Array}>}
|
|
73
|
+
*/
|
|
74
|
+
async query(vector, options = {}) {
|
|
75
|
+
return this.index.query(vector, {
|
|
76
|
+
topK: options.topK || 10,
|
|
77
|
+
filter: options.filter,
|
|
78
|
+
returnValues: options.returnValues ?? false,
|
|
79
|
+
returnMetadata: options.returnMetadata ?? true
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Get vectors by IDs
|
|
85
|
+
* @param {string[]} ids - Vector IDs to retrieve
|
|
86
|
+
* @returns {Promise<Array>}
|
|
87
|
+
*/
|
|
88
|
+
async getByIds(ids) {
|
|
89
|
+
return this.index.getByIds(ids);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Delete vectors by IDs
|
|
94
|
+
* @param {string[]} ids - Vector IDs to delete
|
|
95
|
+
* @returns {Promise<{count: number}>}
|
|
96
|
+
*/
|
|
97
|
+
async deleteByIds(ids) {
|
|
98
|
+
return this.index.deleteByIds(ids);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Get index info
|
|
103
|
+
* @returns {Promise<Object>}
|
|
104
|
+
*/
|
|
105
|
+
async describe() {
|
|
106
|
+
return this.index.describe();
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Semantic search helper combining AI embeddings with Vectorize
|
|
112
|
+
*/
|
|
113
|
+
export class VectorSearch {
|
|
114
|
+
/**
|
|
115
|
+
* @param {VectorStore} vectorStore - Vector store instance
|
|
116
|
+
* @param {AIClient} aiClient - AI client for embeddings
|
|
117
|
+
*/
|
|
118
|
+
constructor(vectorStore, aiClient) {
|
|
119
|
+
this.store = vectorStore;
|
|
120
|
+
this.ai = aiClient;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Search with natural language query
|
|
125
|
+
* @param {string} query - Search query
|
|
126
|
+
* @param {Object} options - Search options
|
|
127
|
+
* @returns {Promise<Array>}
|
|
128
|
+
*/
|
|
129
|
+
async search(query, options = {}) {
|
|
130
|
+
// Generate embedding for query
|
|
131
|
+
const embeddings = await this.ai.embed(query);
|
|
132
|
+
const queryVector = embeddings[0];
|
|
133
|
+
|
|
134
|
+
// Query vector store
|
|
135
|
+
const results = await this.store.query(queryVector, {
|
|
136
|
+
topK: options.topK || 10,
|
|
137
|
+
filter: options.filter,
|
|
138
|
+
returnMetadata: true
|
|
139
|
+
});
|
|
140
|
+
return results.matches.map(match => ({
|
|
141
|
+
id: match.id,
|
|
142
|
+
score: match.score,
|
|
143
|
+
metadata: match.metadata
|
|
144
|
+
}));
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Index a document
|
|
149
|
+
* @param {string} id - Document ID
|
|
150
|
+
* @param {string} text - Document text
|
|
151
|
+
* @param {Object} metadata - Additional metadata
|
|
152
|
+
*/
|
|
153
|
+
async indexDocument(id, text, metadata = {}) {
|
|
154
|
+
const embeddings = await this.ai.embed(text);
|
|
155
|
+
await this.store.upsert([{
|
|
156
|
+
id,
|
|
157
|
+
values: embeddings[0],
|
|
158
|
+
metadata: {
|
|
159
|
+
...metadata,
|
|
160
|
+
textPreview: text.slice(0, 200)
|
|
161
|
+
}
|
|
162
|
+
}]);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Index multiple documents
|
|
167
|
+
* @param {Array<{id: string, text: string, metadata?: Object}>} documents
|
|
168
|
+
*/
|
|
169
|
+
async indexDocuments(documents) {
|
|
170
|
+
// Batch embed all texts
|
|
171
|
+
const texts = documents.map(d => d.text);
|
|
172
|
+
const embeddings = await this.ai.embed(texts);
|
|
173
|
+
const vectors = documents.map((doc, i) => ({
|
|
174
|
+
id: doc.id,
|
|
175
|
+
values: embeddings[i],
|
|
176
|
+
metadata: {
|
|
177
|
+
...doc.metadata,
|
|
178
|
+
textPreview: doc.text.slice(0, 200)
|
|
179
|
+
}
|
|
180
|
+
}));
|
|
181
|
+
await this.store.upsert(vectors);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Find similar documents
|
|
186
|
+
* @param {string} documentId - Source document ID
|
|
187
|
+
* @param {Object} options - Search options
|
|
188
|
+
*/
|
|
189
|
+
async findSimilar(documentId, options = {}) {
|
|
190
|
+
const [doc] = await this.store.getByIds([documentId]);
|
|
191
|
+
if (!doc) return [];
|
|
192
|
+
const results = await this.store.query(doc.values, {
|
|
193
|
+
topK: (options.topK || 10) + 1,
|
|
194
|
+
// +1 to exclude self
|
|
195
|
+
filter: options.filter
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
// Filter out the source document
|
|
199
|
+
return results.matches.filter(m => m.id !== documentId).slice(0, options.topK || 10);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Helper for managing embeddings
|
|
205
|
+
*/
|
|
206
|
+
export class EmbeddingHelper {
|
|
207
|
+
constructor(aiClient, options = {}) {
|
|
208
|
+
this.ai = aiClient;
|
|
209
|
+
this.model = options.model || '@cf/baai/bge-base-en-v1.5';
|
|
210
|
+
this.dimensions = options.dimensions || 768;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Generate embedding for text
|
|
215
|
+
* @param {string|string[]} text - Text to embed
|
|
216
|
+
* @returns {Promise<number[]|number[][]>}
|
|
217
|
+
*/
|
|
218
|
+
async embed(text) {
|
|
219
|
+
const embeddings = await this.ai.embed(text, {
|
|
220
|
+
model: this.model
|
|
221
|
+
});
|
|
222
|
+
return Array.isArray(text) ? embeddings : embeddings[0];
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Chunk text for long documents
|
|
227
|
+
* @param {string} text - Text to chunk
|
|
228
|
+
* @param {Object} options - Chunking options
|
|
229
|
+
*/
|
|
230
|
+
chunkText(text, options = {}) {
|
|
231
|
+
const chunkSize = options.chunkSize || 500;
|
|
232
|
+
const overlap = options.overlap || 50;
|
|
233
|
+
const chunks = [];
|
|
234
|
+
let start = 0;
|
|
235
|
+
while (start < text.length) {
|
|
236
|
+
const end = Math.min(start + chunkSize, text.length);
|
|
237
|
+
chunks.push({
|
|
238
|
+
text: text.slice(start, end),
|
|
239
|
+
start,
|
|
240
|
+
end
|
|
241
|
+
});
|
|
242
|
+
start = end - overlap;
|
|
243
|
+
if (start >= text.length - overlap) break;
|
|
244
|
+
}
|
|
245
|
+
return chunks;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Embed long document with chunking
|
|
250
|
+
* @param {string} documentId - Document ID
|
|
251
|
+
* @param {string} text - Document text
|
|
252
|
+
* @param {Object} options - Options
|
|
253
|
+
*/
|
|
254
|
+
async embedDocument(documentId, text, options = {}) {
|
|
255
|
+
const chunks = this.chunkText(text, options);
|
|
256
|
+
const vectors = await Promise.all(chunks.map(async (chunk, i) => {
|
|
257
|
+
const embedding = await this.embed(chunk.text);
|
|
258
|
+
return {
|
|
259
|
+
id: `${documentId}#chunk-${i}`,
|
|
260
|
+
values: embedding,
|
|
261
|
+
metadata: {
|
|
262
|
+
documentId,
|
|
263
|
+
chunkIndex: i,
|
|
264
|
+
start: chunk.start,
|
|
265
|
+
end: chunk.end,
|
|
266
|
+
text: chunk.text
|
|
267
|
+
}
|
|
268
|
+
};
|
|
269
|
+
}));
|
|
270
|
+
return vectors;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
export default VectorStore;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tamyla/clodo-framework",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"description": "Reusable framework for Clodo-style software architecture on Cloudflare Workers + D1",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": [
|
|
@@ -37,7 +37,19 @@
|
|
|
37
37
|
"./errors": "./dist/errors/index.js",
|
|
38
38
|
"./testing": "./dist/testing/index.js",
|
|
39
39
|
"./middleware": "./dist/middleware/index.js",
|
|
40
|
-
"./modules/security": "./dist/modules/security.js"
|
|
40
|
+
"./modules/security": "./dist/modules/security.js",
|
|
41
|
+
"./utilities": "./dist/utilities/index.js",
|
|
42
|
+
"./utilities/storage": "./dist/utilities/storage/r2.js",
|
|
43
|
+
"./utilities/kv": "./dist/utilities/kv/storage.js",
|
|
44
|
+
"./utilities/durable-objects": "./dist/utilities/durable-objects/index.js",
|
|
45
|
+
"./utilities/queues": "./dist/utilities/queues/index.js",
|
|
46
|
+
"./utilities/ai": "./dist/utilities/ai/client.js",
|
|
47
|
+
"./utilities/vectorize": "./dist/utilities/vectorize/store.js",
|
|
48
|
+
"./utilities/cache": "./dist/utilities/cache/index.js",
|
|
49
|
+
"./utilities/email": "./dist/utilities/email/handler.js",
|
|
50
|
+
"./utilities/scheduled": "./dist/utilities/scheduled/handler.js",
|
|
51
|
+
"./utilities/analytics": "./dist/utilities/analytics/writer.js",
|
|
52
|
+
"./utilities/bindings": "./dist/utilities/bindings/client.js"
|
|
41
53
|
},
|
|
42
54
|
"bin": {
|
|
43
55
|
"clodo-service": "./dist/cli/clodo-service.js",
|
|
@@ -191,7 +203,13 @@
|
|
|
191
203
|
"url": "git+https://github.com/tamylaa/clodo-framework.git"
|
|
192
204
|
},
|
|
193
205
|
"bugs": {
|
|
194
|
-
"url": "https://github.com/tamylaa/clodo-framework/issues"
|
|
206
|
+
"url": "https://github.com/tamylaa/clodo-framework/issues",
|
|
207
|
+
"email": "product@clodo.dev"
|
|
208
|
+
},
|
|
209
|
+
"support": "https://github.com/tamyla/clodo-framework/blob/main/SUPPORT.md",
|
|
210
|
+
"funding": {
|
|
211
|
+
"type": "github",
|
|
212
|
+
"url": "https://github.com/sponsors/tamylaa"
|
|
195
213
|
},
|
|
196
214
|
"homepage": "https://clodo.dev",
|
|
197
215
|
"release": {
|