holosphere 1.1.20 → 2.0.0-alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +36 -0
- package/.eslintrc.json +16 -0
- package/.prettierrc.json +7 -0
- package/LICENSE +162 -38
- package/README.md +483 -367
- package/bin/holosphere-activitypub.js +158 -0
- package/cleanup-test-data.js +204 -0
- package/examples/demo.html +1333 -0
- package/examples/example-bot.js +197 -0
- package/package.json +47 -87
- package/scripts/check-bundle-size.js +54 -0
- package/scripts/check-quest-ids.js +77 -0
- package/scripts/import-holons.js +578 -0
- package/scripts/publish-to-relay.js +101 -0
- package/scripts/read-example.js +186 -0
- package/scripts/relay-diagnostic.js +59 -0
- package/scripts/relay-example.js +179 -0
- package/scripts/resync-to-relay.js +245 -0
- package/scripts/revert-import.js +196 -0
- package/scripts/test-hybrid-mode.js +108 -0
- package/scripts/test-local-storage.js +63 -0
- package/scripts/test-nostr-direct.js +55 -0
- package/scripts/test-read-data.js +45 -0
- package/scripts/test-write-read.js +63 -0
- package/scripts/verify-import.js +95 -0
- package/scripts/verify-relay-data.js +139 -0
- package/src/ai/aggregation.js +319 -0
- package/src/ai/breakdown.js +511 -0
- package/src/ai/classifier.js +217 -0
- package/src/ai/council.js +228 -0
- package/src/ai/embeddings.js +279 -0
- package/src/ai/federation-ai.js +324 -0
- package/src/ai/h3-ai.js +955 -0
- package/src/ai/index.js +112 -0
- package/src/ai/json-ops.js +225 -0
- package/src/ai/llm-service.js +205 -0
- package/src/ai/nl-query.js +223 -0
- package/src/ai/relationships.js +353 -0
- package/src/ai/schema-extractor.js +218 -0
- package/src/ai/spatial.js +293 -0
- package/src/ai/tts.js +194 -0
- package/src/content/social-protocols.js +168 -0
- package/src/core/holosphere.js +273 -0
- package/src/crypto/secp256k1.js +259 -0
- package/src/federation/discovery.js +334 -0
- package/src/federation/hologram.js +1042 -0
- package/src/federation/registry.js +386 -0
- package/src/hierarchical/upcast.js +110 -0
- package/src/index.js +2669 -0
- package/src/schema/validator.js +91 -0
- package/src/spatial/h3-operations.js +110 -0
- package/src/storage/backend-factory.js +125 -0
- package/src/storage/backend-interface.js +142 -0
- package/src/storage/backends/activitypub/server.js +653 -0
- package/src/storage/backends/activitypub-backend.js +272 -0
- package/src/storage/backends/gundb-backend.js +233 -0
- package/src/storage/backends/nostr-backend.js +136 -0
- package/src/storage/filesystem-storage-browser.js +41 -0
- package/src/storage/filesystem-storage.js +138 -0
- package/src/storage/global-tables.js +81 -0
- package/src/storage/gun-async.js +281 -0
- package/src/storage/gun-wrapper.js +221 -0
- package/src/storage/indexeddb-storage.js +122 -0
- package/src/storage/key-storage-simple.js +76 -0
- package/src/storage/key-storage.js +136 -0
- package/src/storage/memory-storage.js +59 -0
- package/src/storage/migration.js +338 -0
- package/src/storage/nostr-async.js +811 -0
- package/src/storage/nostr-client.js +939 -0
- package/src/storage/nostr-wrapper.js +211 -0
- package/src/storage/outbox-queue.js +208 -0
- package/src/storage/persistent-storage.js +109 -0
- package/src/storage/sync-service.js +164 -0
- package/src/subscriptions/manager.js +142 -0
- package/test-ai-real-api.js +202 -0
- package/tests/unit/ai/aggregation.test.js +295 -0
- package/tests/unit/ai/breakdown.test.js +446 -0
- package/tests/unit/ai/classifier.test.js +294 -0
- package/tests/unit/ai/council.test.js +262 -0
- package/tests/unit/ai/embeddings.test.js +384 -0
- package/tests/unit/ai/federation-ai.test.js +344 -0
- package/tests/unit/ai/h3-ai.test.js +458 -0
- package/tests/unit/ai/index.test.js +304 -0
- package/tests/unit/ai/json-ops.test.js +307 -0
- package/tests/unit/ai/llm-service.test.js +390 -0
- package/tests/unit/ai/nl-query.test.js +383 -0
- package/tests/unit/ai/relationships.test.js +311 -0
- package/tests/unit/ai/schema-extractor.test.js +384 -0
- package/tests/unit/ai/spatial.test.js +279 -0
- package/tests/unit/ai/tts.test.js +279 -0
- package/tests/unit/content.test.js +332 -0
- package/tests/unit/contract/core.test.js +88 -0
- package/tests/unit/contract/crypto.test.js +198 -0
- package/tests/unit/contract/data.test.js +223 -0
- package/tests/unit/contract/federation.test.js +181 -0
- package/tests/unit/contract/hierarchical.test.js +113 -0
- package/tests/unit/contract/schema.test.js +114 -0
- package/tests/unit/contract/social.test.js +217 -0
- package/tests/unit/contract/spatial.test.js +110 -0
- package/tests/unit/contract/subscriptions.test.js +128 -0
- package/tests/unit/contract/utils.test.js +159 -0
- package/tests/unit/core.test.js +152 -0
- package/tests/unit/crypto.test.js +328 -0
- package/tests/unit/federation.test.js +234 -0
- package/tests/unit/gun-async.test.js +252 -0
- package/tests/unit/hierarchical.test.js +399 -0
- package/tests/unit/integration/scenario-01-geographic-storage.test.js +74 -0
- package/tests/unit/integration/scenario-02-federation.test.js +76 -0
- package/tests/unit/integration/scenario-03-subscriptions.test.js +102 -0
- package/tests/unit/integration/scenario-04-validation.test.js +129 -0
- package/tests/unit/integration/scenario-05-hierarchy.test.js +125 -0
- package/tests/unit/integration/scenario-06-social.test.js +135 -0
- package/tests/unit/integration/scenario-07-persistence.test.js +130 -0
- package/tests/unit/integration/scenario-08-authorization.test.js +161 -0
- package/tests/unit/integration/scenario-09-cross-dimensional.test.js +139 -0
- package/tests/unit/integration/scenario-10-cross-holosphere-capabilities.test.js +357 -0
- package/tests/unit/integration/scenario-11-cross-holosphere-federation.test.js +410 -0
- package/tests/unit/integration/scenario-12-capability-federated-read.test.js +719 -0
- package/tests/unit/performance/benchmark.test.js +85 -0
- package/tests/unit/schema.test.js +213 -0
- package/tests/unit/spatial.test.js +158 -0
- package/tests/unit/storage.test.js +195 -0
- package/tests/unit/subscriptions.test.js +328 -0
- package/tests/unit/test-data-permanence-debug.js +197 -0
- package/tests/unit/test-data-permanence.js +340 -0
- package/tests/unit/test-key-persistence-fixed.js +148 -0
- package/tests/unit/test-key-persistence.js +172 -0
- package/tests/unit/test-relay-permanence.js +376 -0
- package/tests/unit/test-second-node.js +95 -0
- package/tests/unit/test-simple-write.js +89 -0
- package/vite.config.js +49 -0
- package/vitest.config.js +20 -0
- package/FEDERATION.md +0 -213
- package/compute.js +0 -298
- package/content.js +0 -980
- package/federation.js +0 -1234
- package/global.js +0 -736
- package/hexlib.js +0 -335
- package/hologram.js +0 -183
- package/holosphere-bundle.esm.js +0 -33256
- package/holosphere-bundle.js +0 -33287
- package/holosphere-bundle.min.js +0 -39
- package/holosphere.d.ts +0 -601
- package/holosphere.js +0 -719
- package/node.js +0 -246
- package/schema.js +0 -139
- package/utils.js +0 -302
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach, vi } from 'vitest';
|
|
2
|
+
import { Embeddings } from '../../../src/ai/embeddings.js';
|
|
3
|
+
|
|
4
|
+
describe('Unit: Embeddings', () => {
|
|
5
|
+
let embeddings;
|
|
6
|
+
let mockOpenAI;
|
|
7
|
+
let mockHolosphere;
|
|
8
|
+
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
vi.clearAllMocks();
|
|
11
|
+
|
|
12
|
+
mockOpenAI = {
|
|
13
|
+
embeddings: {
|
|
14
|
+
create: vi.fn()
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
mockHolosphere = {
|
|
19
|
+
put: vi.fn().mockResolvedValue({ success: true }),
|
|
20
|
+
getAll: vi.fn().mockResolvedValue([])
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
embeddings = new Embeddings(mockOpenAI, mockHolosphere);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
describe('Constructor', () => {
|
|
27
|
+
it('should initialize with OpenAI client', () => {
|
|
28
|
+
const emb = new Embeddings(mockOpenAI);
|
|
29
|
+
expect(emb.openai).toBe(mockOpenAI);
|
|
30
|
+
expect(emb.model).toBe('text-embedding-3-small');
|
|
31
|
+
expect(emb.dimensions).toBe(1536);
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('should accept optional HoloSphere instance', () => {
|
|
35
|
+
const emb = new Embeddings(mockOpenAI, mockHolosphere);
|
|
36
|
+
expect(emb.holosphere).toBe(mockHolosphere);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it('should work without HoloSphere instance', () => {
|
|
40
|
+
const emb = new Embeddings(mockOpenAI);
|
|
41
|
+
expect(emb.holosphere).toBeNull();
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe('setHoloSphere', () => {
|
|
46
|
+
it('should set HoloSphere instance', () => {
|
|
47
|
+
const emb = new Embeddings(mockOpenAI);
|
|
48
|
+
emb.setHoloSphere(mockHolosphere);
|
|
49
|
+
expect(emb.holosphere).toBe(mockHolosphere);
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
describe('embed', () => {
|
|
54
|
+
it('should generate embedding for text', async () => {
|
|
55
|
+
const mockEmbedding = Array(1536).fill(0).map((_, i) => i / 1536);
|
|
56
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
57
|
+
data: [{ embedding: mockEmbedding }]
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
const result = await embeddings.embed('Hello world');
|
|
61
|
+
|
|
62
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
63
|
+
model: 'text-embedding-3-small',
|
|
64
|
+
input: 'Hello world'
|
|
65
|
+
});
|
|
66
|
+
expect(result).toEqual(mockEmbedding);
|
|
67
|
+
expect(result).toHaveLength(1536);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('should throw error on API failure', async () => {
|
|
71
|
+
mockOpenAI.embeddings.create.mockRejectedValue(new Error('API Error'));
|
|
72
|
+
|
|
73
|
+
await expect(embeddings.embed('Test'))
|
|
74
|
+
.rejects.toThrow('Embedding generation failed: API Error');
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
describe('embedBatch', () => {
|
|
79
|
+
it('should generate embeddings for multiple texts', async () => {
|
|
80
|
+
const mockEmbeddings = [
|
|
81
|
+
{ embedding: Array(1536).fill(0.1) },
|
|
82
|
+
{ embedding: Array(1536).fill(0.2) },
|
|
83
|
+
{ embedding: Array(1536).fill(0.3) }
|
|
84
|
+
];
|
|
85
|
+
mockOpenAI.embeddings.create.mockResolvedValue({ data: mockEmbeddings });
|
|
86
|
+
|
|
87
|
+
const result = await embeddings.embedBatch(['text1', 'text2', 'text3']);
|
|
88
|
+
|
|
89
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
90
|
+
model: 'text-embedding-3-small',
|
|
91
|
+
input: ['text1', 'text2', 'text3']
|
|
92
|
+
});
|
|
93
|
+
expect(result).toHaveLength(3);
|
|
94
|
+
expect(result[0]).toEqual(Array(1536).fill(0.1));
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
it('should throw error on batch failure', async () => {
|
|
98
|
+
mockOpenAI.embeddings.create.mockRejectedValue(new Error('Batch failed'));
|
|
99
|
+
|
|
100
|
+
await expect(embeddings.embedBatch(['a', 'b']))
|
|
101
|
+
.rejects.toThrow('Batch embedding failed: Batch failed');
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
describe('cosineSimilarity', () => {
|
|
106
|
+
it('should calculate cosine similarity between vectors', () => {
|
|
107
|
+
const vec1 = [1, 0, 0];
|
|
108
|
+
const vec2 = [1, 0, 0];
|
|
109
|
+
|
|
110
|
+
const similarity = embeddings.cosineSimilarity(vec1, vec2);
|
|
111
|
+
expect(similarity).toBeCloseTo(1.0, 5);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it('should return 0 for orthogonal vectors', () => {
|
|
115
|
+
const vec1 = [1, 0, 0];
|
|
116
|
+
const vec2 = [0, 1, 0];
|
|
117
|
+
|
|
118
|
+
const similarity = embeddings.cosineSimilarity(vec1, vec2);
|
|
119
|
+
expect(similarity).toBeCloseTo(0, 5);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('should return -1 for opposite vectors', () => {
|
|
123
|
+
const vec1 = [1, 0, 0];
|
|
124
|
+
const vec2 = [-1, 0, 0];
|
|
125
|
+
|
|
126
|
+
const similarity = embeddings.cosineSimilarity(vec1, vec2);
|
|
127
|
+
expect(similarity).toBeCloseTo(-1.0, 5);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it('should throw error for vectors of different lengths', () => {
|
|
131
|
+
const vec1 = [1, 2, 3];
|
|
132
|
+
const vec2 = [1, 2];
|
|
133
|
+
|
|
134
|
+
expect(() => embeddings.cosineSimilarity(vec1, vec2))
|
|
135
|
+
.toThrow('Vectors must have same length');
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it('should handle normalized vectors correctly', () => {
|
|
139
|
+
const vec1 = [0.6, 0.8, 0];
|
|
140
|
+
const vec2 = [0.6, 0.8, 0];
|
|
141
|
+
|
|
142
|
+
const similarity = embeddings.cosineSimilarity(vec1, vec2);
|
|
143
|
+
expect(similarity).toBeCloseTo(1.0, 5);
|
|
144
|
+
});
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
describe('storeWithEmbedding', () => {
|
|
148
|
+
it('should store item with generated embedding', async () => {
|
|
149
|
+
const mockEmbedding = Array(1536).fill(0.5);
|
|
150
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
151
|
+
data: [{ embedding: mockEmbedding }]
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
const item = { content: 'Test content', title: 'Test' };
|
|
155
|
+
const result = await embeddings.storeWithEmbedding('holon1', 'lens1', item);
|
|
156
|
+
|
|
157
|
+
expect(result._embedding).toEqual(mockEmbedding);
|
|
158
|
+
expect(result._embeddedField).toBe('auto');
|
|
159
|
+
expect(result._embeddedAt).toBeDefined();
|
|
160
|
+
expect(mockHolosphere.put).toHaveBeenCalledWith('holon1', 'lens1', expect.objectContaining({
|
|
161
|
+
content: 'Test content',
|
|
162
|
+
_embedding: mockEmbedding
|
|
163
|
+
}));
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
it('should use specified text field for embedding', async () => {
|
|
167
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
168
|
+
data: [{ embedding: Array(1536).fill(0) }]
|
|
169
|
+
});
|
|
170
|
+
|
|
171
|
+
const item = { title: 'Title', description: 'Description text' };
|
|
172
|
+
await embeddings.storeWithEmbedding('holon1', 'lens1', item, 'description');
|
|
173
|
+
|
|
174
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
175
|
+
model: 'text-embedding-3-small',
|
|
176
|
+
input: 'Description text'
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it('should throw error if HoloSphere not available', async () => {
|
|
181
|
+
const emb = new Embeddings(mockOpenAI);
|
|
182
|
+
|
|
183
|
+
await expect(emb.storeWithEmbedding('holon', 'lens', { text: 'test' }))
|
|
184
|
+
.rejects.toThrow('HoloSphere instance required for storage');
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
it('should throw error if no text found to embed', async () => {
|
|
188
|
+
const item = { id: 123, number: 456 };
|
|
189
|
+
|
|
190
|
+
await expect(embeddings.storeWithEmbedding('holon', 'lens', item, 'missing'))
|
|
191
|
+
.rejects.toThrow('No text found to embed');
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
it('should fall back to JSON stringify for objects without text fields', async () => {
|
|
195
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
196
|
+
data: [{ embedding: Array(1536).fill(0) }]
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
const item = { id: 123, data: [1, 2, 3] };
|
|
200
|
+
await embeddings.storeWithEmbedding('holon', 'lens', item);
|
|
201
|
+
|
|
202
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
203
|
+
model: 'text-embedding-3-small',
|
|
204
|
+
input: JSON.stringify(item)
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
describe('semanticSearch', () => {
|
|
210
|
+
it('should search for similar items using embeddings', async () => {
|
|
211
|
+
const queryEmbedding = Array(1536).fill(0.5);
|
|
212
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
213
|
+
data: [{ embedding: queryEmbedding }]
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
const storedItems = [
|
|
217
|
+
{ id: 1, text: 'Similar item', _embedding: Array(1536).fill(0.5) },
|
|
218
|
+
{ id: 2, text: 'Less similar', _embedding: Array(1536).fill(0.1) },
|
|
219
|
+
{ id: 3, text: 'No embedding' }
|
|
220
|
+
];
|
|
221
|
+
mockHolosphere.getAll.mockResolvedValue(storedItems);
|
|
222
|
+
|
|
223
|
+
const results = await embeddings.semanticSearch('query', 'holon1', 'lens1');
|
|
224
|
+
|
|
225
|
+
expect(results).toHaveLength(2); // Only items with embeddings
|
|
226
|
+
expect(results[0].similarity).toBeGreaterThan(results[1].similarity);
|
|
227
|
+
expect(results[0].item._embedding).toBeUndefined(); // Embedding not returned
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
it('should respect threshold option', async () => {
|
|
231
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
232
|
+
data: [{ embedding: Array(1536).fill(1) }]
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
mockHolosphere.getAll.mockResolvedValue([
|
|
236
|
+
{ id: 1, _embedding: Array(1536).fill(0.9) }, // High similarity
|
|
237
|
+
{ id: 2, _embedding: Array(1536).fill(0.1) } // Low similarity
|
|
238
|
+
]);
|
|
239
|
+
|
|
240
|
+
const results = await embeddings.semanticSearch('query', 'holon', 'lens', { threshold: 0.8 });
|
|
241
|
+
|
|
242
|
+
expect(results.length).toBeLessThanOrEqual(2);
|
|
243
|
+
results.forEach(r => expect(r.similarity).toBeGreaterThanOrEqual(0.8));
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
it('should respect limit option', async () => {
|
|
247
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
248
|
+
data: [{ embedding: Array(1536).fill(0.5) }]
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
const manyItems = Array(20).fill(null).map((_, i) => ({
|
|
252
|
+
id: i,
|
|
253
|
+
_embedding: Array(1536).fill(0.5)
|
|
254
|
+
}));
|
|
255
|
+
mockHolosphere.getAll.mockResolvedValue(manyItems);
|
|
256
|
+
|
|
257
|
+
const results = await embeddings.semanticSearch('query', 'holon', 'lens', { limit: 5 });
|
|
258
|
+
|
|
259
|
+
expect(results).toHaveLength(5);
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it('should throw error if HoloSphere not available', async () => {
|
|
263
|
+
const emb = new Embeddings(mockOpenAI);
|
|
264
|
+
|
|
265
|
+
await expect(emb.semanticSearch('query', 'holon', 'lens'))
|
|
266
|
+
.rejects.toThrow('HoloSphere instance required for search');
|
|
267
|
+
});
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
describe('findSimilar', () => {
|
|
271
|
+
it('should find items similar to given item', async () => {
|
|
272
|
+
mockOpenAI.embeddings.create.mockResolvedValue({
|
|
273
|
+
data: [{ embedding: Array(1536).fill(0.5) }]
|
|
274
|
+
});
|
|
275
|
+
|
|
276
|
+
mockHolosphere.getAll.mockResolvedValue([
|
|
277
|
+
{ id: 1, _embedding: Array(1536).fill(0.5) }
|
|
278
|
+
]);
|
|
279
|
+
|
|
280
|
+
const item = { content: 'Test item' };
|
|
281
|
+
const results = await embeddings.findSimilar(item, 'holon', 'lens');
|
|
282
|
+
|
|
283
|
+
expect(mockOpenAI.embeddings.create).toHaveBeenCalledWith({
|
|
284
|
+
model: 'text-embedding-3-small',
|
|
285
|
+
input: 'Test item'
|
|
286
|
+
});
|
|
287
|
+
expect(results).toBeDefined();
|
|
288
|
+
});
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
describe('cluster', () => {
|
|
292
|
+
it('should cluster items by semantic similarity', async () => {
|
|
293
|
+
const items = [
|
|
294
|
+
{ id: 1, text: 'Item 1' },
|
|
295
|
+
{ id: 2, text: 'Item 2' },
|
|
296
|
+
{ id: 3, text: 'Item 3' }
|
|
297
|
+
];
|
|
298
|
+
|
|
299
|
+
// Mock embeddings for each item
|
|
300
|
+
mockOpenAI.embeddings.create
|
|
301
|
+
.mockResolvedValueOnce({ data: [{ embedding: Array(1536).fill(0.1) }] })
|
|
302
|
+
.mockResolvedValueOnce({ data: [{ embedding: Array(1536).fill(0.5) }] })
|
|
303
|
+
.mockResolvedValueOnce({ data: [{ embedding: Array(1536).fill(0.9) }] });
|
|
304
|
+
|
|
305
|
+
const clusters = await embeddings.cluster(items, 2);
|
|
306
|
+
|
|
307
|
+
expect(Array.isArray(clusters)).toBe(true);
|
|
308
|
+
expect(clusters.length).toBeLessThanOrEqual(2);
|
|
309
|
+
// Embeddings should not be included in output
|
|
310
|
+
clusters.forEach(cluster => {
|
|
311
|
+
cluster.forEach(item => {
|
|
312
|
+
expect(item._embedding).toBeUndefined();
|
|
313
|
+
});
|
|
314
|
+
});
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
it('should use existing embeddings if available', async () => {
|
|
318
|
+
const items = [
|
|
319
|
+
{ id: 1, _embedding: Array(1536).fill(0.1) },
|
|
320
|
+
{ id: 2, _embedding: Array(1536).fill(0.2) }
|
|
321
|
+
];
|
|
322
|
+
|
|
323
|
+
const clusters = await embeddings.cluster(items, 2);
|
|
324
|
+
|
|
325
|
+
expect(mockOpenAI.embeddings.create).not.toHaveBeenCalled();
|
|
326
|
+
expect(clusters).toBeDefined();
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
it('should return individual items as clusters when fewer items than k', async () => {
|
|
330
|
+
const items = [
|
|
331
|
+
{ id: 1, _embedding: Array(1536).fill(0.1) },
|
|
332
|
+
{ id: 2, _embedding: Array(1536).fill(0.2) }
|
|
333
|
+
];
|
|
334
|
+
|
|
335
|
+
const clusters = await embeddings.cluster(items, 5);
|
|
336
|
+
|
|
337
|
+
expect(clusters).toHaveLength(2);
|
|
338
|
+
expect(clusters[0]).toHaveLength(1);
|
|
339
|
+
expect(clusters[1]).toHaveLength(1);
|
|
340
|
+
});
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
describe('_kMeans', () => {
|
|
344
|
+
it('should perform k-means clustering', () => {
|
|
345
|
+
// Use consistent dimensions (1536 is the default)
|
|
346
|
+
const dim = 1536;
|
|
347
|
+
const items = [
|
|
348
|
+
{ id: 1, _embedding: Array(dim).fill(0).map((_, i) => i < dim/2 ? 0.9 : 0.1) },
|
|
349
|
+
{ id: 2, _embedding: Array(dim).fill(0).map((_, i) => i < dim/2 ? 0.85 : 0.15) },
|
|
350
|
+
{ id: 3, _embedding: Array(dim).fill(0).map((_, i) => i < dim/2 ? 0.1 : 0.9) },
|
|
351
|
+
{ id: 4, _embedding: Array(dim).fill(0).map((_, i) => i < dim/2 ? 0.15 : 0.85) }
|
|
352
|
+
];
|
|
353
|
+
|
|
354
|
+
const clusters = embeddings._kMeans(items, 2);
|
|
355
|
+
|
|
356
|
+
expect(clusters).toHaveLength(2);
|
|
357
|
+
// Each cluster should have 2 items (the similar pairs)
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
it('should handle items less than k', () => {
|
|
361
|
+
const dim = 1536;
|
|
362
|
+
const items = [
|
|
363
|
+
{ id: 1, _embedding: Array(dim).fill(0.5) }
|
|
364
|
+
];
|
|
365
|
+
|
|
366
|
+
const clusters = embeddings._kMeans(items, 3);
|
|
367
|
+
|
|
368
|
+
expect(clusters).toHaveLength(1);
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
it('should converge within max iterations', () => {
|
|
372
|
+
const dim = 1536;
|
|
373
|
+
const items = Array(10).fill(null).map((_, i) => ({
|
|
374
|
+
id: i,
|
|
375
|
+
_embedding: Array(dim).fill(0).map(() => Math.random())
|
|
376
|
+
}));
|
|
377
|
+
|
|
378
|
+
const clusters = embeddings._kMeans(items, 3, 10);
|
|
379
|
+
|
|
380
|
+
expect(clusters.length).toBeGreaterThan(0);
|
|
381
|
+
expect(clusters.length).toBeLessThanOrEqual(3);
|
|
382
|
+
});
|
|
383
|
+
});
|
|
384
|
+
});
|