@skillsmith/core 2.1.0 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/src/analysis/types.d.ts +2 -0
- package/dist/src/analysis/types.d.ts.map +1 -1
- package/dist/src/analysis/types.js +13 -1
- package/dist/src/analysis/types.js.map +1 -1
- package/dist/src/analytics/AnalyticsRepository.d.ts +4 -0
- package/dist/src/analytics/AnalyticsRepository.d.ts.map +1 -1
- package/dist/src/analytics/AnalyticsRepository.js +26 -44
- package/dist/src/analytics/AnalyticsRepository.js.map +1 -1
- package/dist/src/analytics/schema.d.ts +1 -1
- package/dist/src/analytics/schema.d.ts.map +1 -1
- package/dist/src/analytics/schema.js +68 -0
- package/dist/src/analytics/schema.js.map +1 -1
- package/dist/src/api/client.d.ts +33 -29
- package/dist/src/api/client.d.ts.map +1 -1
- package/dist/src/api/client.js +15 -10
- package/dist/src/api/client.js.map +1 -1
- package/dist/src/billing/BillingService.d.ts +139 -0
- package/dist/src/billing/BillingService.d.ts.map +1 -0
- package/dist/src/billing/BillingService.js +393 -0
- package/dist/src/billing/BillingService.js.map +1 -0
- package/dist/src/billing/GDPRComplianceService.d.ts +176 -0
- package/dist/src/billing/GDPRComplianceService.d.ts.map +1 -0
- package/dist/src/billing/GDPRComplianceService.js +361 -0
- package/dist/src/billing/GDPRComplianceService.js.map +1 -0
- package/dist/src/billing/StripeClient.d.ts +177 -0
- package/dist/src/billing/StripeClient.d.ts.map +1 -0
- package/dist/src/billing/StripeClient.js +462 -0
- package/dist/src/billing/StripeClient.js.map +1 -0
- package/dist/src/billing/StripeReconciliationJob.d.ts +95 -0
- package/dist/src/billing/StripeReconciliationJob.d.ts.map +1 -0
- package/dist/src/billing/StripeReconciliationJob.js +405 -0
- package/dist/src/billing/StripeReconciliationJob.js.map +1 -0
- package/dist/src/billing/StripeWebhookHandler.d.ts +92 -0
- package/dist/src/billing/StripeWebhookHandler.d.ts.map +1 -0
- package/dist/src/billing/StripeWebhookHandler.js +409 -0
- package/dist/src/billing/StripeWebhookHandler.js.map +1 -0
- package/dist/src/billing/index.d.ts +18 -0
- package/dist/src/billing/index.d.ts.map +1 -0
- package/dist/src/billing/index.js +19 -0
- package/dist/src/billing/index.js.map +1 -0
- package/dist/src/billing/types.d.ts +266 -0
- package/dist/src/billing/types.d.ts.map +1 -0
- package/dist/src/billing/types.js +23 -0
- package/dist/src/billing/types.js.map +1 -0
- package/dist/src/embeddings/hnsw-store.d.ts +568 -0
- package/dist/src/embeddings/hnsw-store.d.ts.map +1 -0
- package/dist/src/embeddings/hnsw-store.js +805 -0
- package/dist/src/embeddings/hnsw-store.js.map +1 -0
- package/dist/src/embeddings/index.d.ts +2 -0
- package/dist/src/embeddings/index.d.ts.map +1 -1
- package/dist/src/embeddings/index.js +2 -0
- package/dist/src/embeddings/index.js.map +1 -1
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +2 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/learning/PatternStore.d.ts +457 -0
- package/dist/src/learning/PatternStore.d.ts.map +1 -0
- package/dist/src/learning/PatternStore.js +893 -0
- package/dist/src/learning/PatternStore.js.map +1 -0
- package/dist/src/learning/ReasoningBankIntegration.d.ts +403 -0
- package/dist/src/learning/ReasoningBankIntegration.d.ts.map +1 -0
- package/dist/src/learning/ReasoningBankIntegration.js +627 -0
- package/dist/src/learning/ReasoningBankIntegration.js.map +1 -0
- package/dist/src/learning/index.d.ts +15 -0
- package/dist/src/learning/index.d.ts.map +1 -0
- package/dist/src/learning/index.js +15 -0
- package/dist/src/learning/index.js.map +1 -0
- package/dist/src/routing/SONARouter.d.ts +154 -0
- package/dist/src/routing/SONARouter.d.ts.map +1 -0
- package/dist/src/routing/SONARouter.js +679 -0
- package/dist/src/routing/SONARouter.js.map +1 -0
- package/dist/src/routing/index.d.ts +9 -0
- package/dist/src/routing/index.d.ts.map +1 -0
- package/dist/src/routing/index.js +10 -0
- package/dist/src/routing/index.js.map +1 -0
- package/dist/src/routing/types.d.ts +331 -0
- package/dist/src/routing/types.d.ts.map +1 -0
- package/dist/src/routing/types.js +203 -0
- package/dist/src/routing/types.js.map +1 -0
- package/dist/src/scripts/__tests__/scan-imported-skills.test.js +5 -0
- package/dist/src/scripts/__tests__/scan-imported-skills.test.js.map +1 -1
- package/dist/src/security/SkillSandbox.d.ts +156 -0
- package/dist/src/security/SkillSandbox.d.ts.map +1 -0
- package/dist/src/security/SkillSandbox.js +303 -0
- package/dist/src/security/SkillSandbox.js.map +1 -0
- package/dist/src/security/index.d.ts +3 -1
- package/dist/src/security/index.d.ts.map +1 -1
- package/dist/src/security/index.js +5 -1
- package/dist/src/security/index.js.map +1 -1
- package/dist/src/security/rate-limiter/presets.d.ts +12 -0
- package/dist/src/security/rate-limiter/presets.d.ts.map +1 -1
- package/dist/src/security/rate-limiter/presets.js +12 -0
- package/dist/src/security/rate-limiter/presets.js.map +1 -1
- package/dist/src/security/sanitization.d.ts +85 -0
- package/dist/src/security/sanitization.d.ts.map +1 -1
- package/dist/src/security/sanitization.js +133 -0
- package/dist/src/security/sanitization.js.map +1 -1
- package/dist/src/security/scanner/SecurityScanner.d.ts +23 -0
- package/dist/src/security/scanner/SecurityScanner.d.ts.map +1 -1
- package/dist/src/security/scanner/SecurityScanner.js +232 -28
- package/dist/src/security/scanner/SecurityScanner.js.map +1 -1
- package/dist/src/security/scanner/patterns.d.ts +13 -0
- package/dist/src/security/scanner/patterns.d.ts.map +1 -1
- package/dist/src/security/scanner/patterns.js +51 -0
- package/dist/src/security/scanner/patterns.js.map +1 -1
- package/dist/src/security/scanner/types.d.ts +13 -1
- package/dist/src/security/scanner/types.d.ts.map +1 -1
- package/dist/src/security/scanner/weights.d.ts.map +1 -1
- package/dist/src/security/scanner/weights.js +1 -0
- package/dist/src/security/scanner/weights.js.map +1 -1
- package/dist/src/session/SessionManager.d.ts +7 -0
- package/dist/src/session/SessionManager.d.ts.map +1 -1
- package/dist/src/session/SessionManager.js +117 -10
- package/dist/src/session/SessionManager.js.map +1 -1
- package/dist/src/sync/SyncEngine.d.ts.map +1 -1
- package/dist/src/sync/SyncEngine.js +52 -32
- package/dist/src/sync/SyncEngine.js.map +1 -1
- package/dist/src/testing/MultiLLMProvider.d.ts +374 -0
- package/dist/src/testing/MultiLLMProvider.d.ts.map +1 -0
- package/dist/src/testing/MultiLLMProvider.js +720 -0
- package/dist/src/testing/MultiLLMProvider.js.map +1 -0
- package/dist/src/testing/index.d.ts +8 -0
- package/dist/src/testing/index.d.ts.map +1 -0
- package/dist/src/testing/index.js +9 -0
- package/dist/src/testing/index.js.map +1 -0
- package/dist/src/types.d.ts +3 -0
- package/dist/src/types.d.ts.map +1 -1
- package/dist/tests/SecurityScanner.test.js +337 -1
- package/dist/tests/SecurityScanner.test.js.map +1 -1
- package/dist/tests/billing/BillingService.test.d.ts +7 -0
- package/dist/tests/billing/BillingService.test.d.ts.map +1 -0
- package/dist/tests/billing/BillingService.test.js +168 -0
- package/dist/tests/billing/BillingService.test.js.map +1 -0
- package/dist/tests/billing/GDPRCompliance.test.d.ts +7 -0
- package/dist/tests/billing/GDPRCompliance.test.d.ts.map +1 -0
- package/dist/tests/billing/GDPRCompliance.test.js +195 -0
- package/dist/tests/billing/GDPRCompliance.test.js.map +1 -0
- package/dist/tests/billing/StripeReconciliation.test.d.ts +7 -0
- package/dist/tests/billing/StripeReconciliation.test.d.ts.map +1 -0
- package/dist/tests/billing/StripeReconciliation.test.js +266 -0
- package/dist/tests/billing/StripeReconciliation.test.js.map +1 -0
- package/dist/tests/billing/stripe-validators.test.d.ts +7 -0
- package/dist/tests/billing/stripe-validators.test.d.ts.map +1 -0
- package/dist/tests/billing/stripe-validators.test.js +107 -0
- package/dist/tests/billing/stripe-validators.test.js.map +1 -0
- package/dist/tests/embeddings/hnsw-store.test.d.ts +7 -0
- package/dist/tests/embeddings/hnsw-store.test.d.ts.map +1 -0
- package/dist/tests/embeddings/hnsw-store.test.js +295 -0
- package/dist/tests/embeddings/hnsw-store.test.js.map +1 -0
- package/dist/tests/integration/neural/e2e-learning.test.d.ts +17 -0
- package/dist/tests/integration/neural/e2e-learning.test.d.ts.map +1 -0
- package/dist/tests/integration/neural/e2e-learning.test.js +238 -0
- package/dist/tests/integration/neural/e2e-learning.test.js.map +1 -0
- package/dist/tests/integration/neural/helpers.d.ts +132 -0
- package/dist/tests/integration/neural/helpers.d.ts.map +1 -0
- package/dist/tests/integration/neural/helpers.js +287 -0
- package/dist/tests/integration/neural/helpers.js.map +1 -0
- package/dist/tests/integration/neural/personalization.test.d.ts +21 -0
- package/dist/tests/integration/neural/personalization.test.d.ts.map +1 -0
- package/dist/tests/integration/neural/personalization.test.js +304 -0
- package/dist/tests/integration/neural/personalization.test.js.map +1 -0
- package/dist/tests/integration/neural/preference-learner.test.d.ts +23 -0
- package/dist/tests/integration/neural/preference-learner.test.d.ts.map +1 -0
- package/dist/tests/integration/neural/preference-learner.test.js +289 -0
- package/dist/tests/integration/neural/preference-learner.test.js.map +1 -0
- package/dist/tests/integration/neural/privacy.test.d.ts +19 -0
- package/dist/tests/integration/neural/privacy.test.d.ts.map +1 -0
- package/dist/tests/integration/neural/privacy.test.js +249 -0
- package/dist/tests/integration/neural/privacy.test.js.map +1 -0
- package/dist/tests/integration/neural/setup.d.ts +175 -0
- package/dist/tests/integration/neural/setup.d.ts.map +1 -0
- package/dist/tests/integration/neural/setup.js +487 -0
- package/dist/tests/integration/neural/setup.js.map +1 -0
- package/dist/tests/integration/neural/signal-collection.test.d.ts +21 -0
- package/dist/tests/integration/neural/signal-collection.test.d.ts.map +1 -0
- package/dist/tests/integration/neural/signal-collection.test.js +232 -0
- package/dist/tests/integration/neural/signal-collection.test.js.map +1 -0
- package/dist/tests/learning/PatternStore.test.d.ts +8 -0
- package/dist/tests/learning/PatternStore.test.d.ts.map +1 -0
- package/dist/tests/learning/PatternStore.test.js +589 -0
- package/dist/tests/learning/PatternStore.test.js.map +1 -0
- package/dist/tests/learning/ReasoningBankIntegration.test.d.ts +8 -0
- package/dist/tests/learning/ReasoningBankIntegration.test.d.ts.map +1 -0
- package/dist/tests/learning/ReasoningBankIntegration.test.js +269 -0
- package/dist/tests/learning/ReasoningBankIntegration.test.js.map +1 -0
- package/dist/tests/routing/SONARouter.test.d.ts +8 -0
- package/dist/tests/routing/SONARouter.test.d.ts.map +1 -0
- package/dist/tests/routing/SONARouter.test.js +400 -0
- package/dist/tests/routing/SONARouter.test.js.map +1 -0
- package/dist/tests/security/ContinuousSecurity.test.js +10 -12
- package/dist/tests/security/ContinuousSecurity.test.js.map +1 -1
- package/dist/tests/security/SkillSandbox.test.d.ts +8 -0
- package/dist/tests/security/SkillSandbox.test.d.ts.map +1 -0
- package/dist/tests/security/SkillSandbox.test.js +321 -0
- package/dist/tests/security/SkillSandbox.test.js.map +1 -0
- package/dist/tests/sync/SyncEngine.test.js +4 -2
- package/dist/tests/sync/SyncEngine.test.js.map +1 -1
- package/dist/tests/testing/MultiLLMProvider.test.d.ts +14 -0
- package/dist/tests/testing/MultiLLMProvider.test.d.ts.map +1 -0
- package/dist/tests/testing/MultiLLMProvider.test.js +438 -0
- package/dist/tests/testing/MultiLLMProvider.test.js.map +1 -0
- package/package.json +16 -3
|
@@ -0,0 +1,805 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SMI-1519: HNSW Embedding Store
|
|
3
|
+
*
|
|
4
|
+
* High-performance vector storage using HNSW (Hierarchical Navigable Small World)
|
|
5
|
+
* index for fast approximate nearest neighbor (ANN) search.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - O(log n) similarity search vs O(n) brute-force (150x faster)
|
|
9
|
+
* - SQLite for metadata persistence (skill_id, text, created_at)
|
|
10
|
+
* - Graceful fallback to brute-force if HNSW unavailable
|
|
11
|
+
* - Compatible with existing EmbeddingService interface
|
|
12
|
+
* - Uses claude-flow V3 VectorDB API with automatic fallback
|
|
13
|
+
*
|
|
14
|
+
* Enable via environment variable: SKILLSMITH_USE_HNSW=true
|
|
15
|
+
*
|
|
16
|
+
* @see ADR-009: Embedding Service Fallback Strategy
|
|
17
|
+
*/
|
|
18
|
+
import Database from 'better-sqlite3';
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// Default Configuration
|
|
21
|
+
// ============================================================================
|
|
22
|
+
/**
|
|
23
|
+
* Default HNSW configuration optimized for skill embeddings.
|
|
24
|
+
* Tuned for ~10k-100k skills with balanced speed/recall.
|
|
25
|
+
*/
|
|
26
|
+
export const DEFAULT_HNSW_CONFIG = {
|
|
27
|
+
m: 16,
|
|
28
|
+
efConstruction: 200,
|
|
29
|
+
efSearch: 100,
|
|
30
|
+
dimensions: 384, // all-MiniLM-L6-v2
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* HNSW configuration presets for different use cases
|
|
34
|
+
*/
|
|
35
|
+
export const HNSW_PRESETS = {
|
|
36
|
+
/** Fast search, lower memory, suitable for <10k vectors */
|
|
37
|
+
small: {
|
|
38
|
+
m: 8,
|
|
39
|
+
efConstruction: 100,
|
|
40
|
+
efSearch: 50,
|
|
41
|
+
dimensions: 384,
|
|
42
|
+
},
|
|
43
|
+
/** Balanced performance, suitable for 10k-100k vectors */
|
|
44
|
+
medium: {
|
|
45
|
+
m: 16,
|
|
46
|
+
efConstruction: 200,
|
|
47
|
+
efSearch: 100,
|
|
48
|
+
dimensions: 384,
|
|
49
|
+
},
|
|
50
|
+
/** High recall, suitable for 100k-1M vectors */
|
|
51
|
+
large: {
|
|
52
|
+
m: 32,
|
|
53
|
+
efConstruction: 400,
|
|
54
|
+
efSearch: 150,
|
|
55
|
+
dimensions: 384,
|
|
56
|
+
},
|
|
57
|
+
/** Maximum recall, suitable for >1M vectors or critical accuracy */
|
|
58
|
+
xlarge: {
|
|
59
|
+
m: 48,
|
|
60
|
+
efConstruction: 500,
|
|
61
|
+
efSearch: 200,
|
|
62
|
+
dimensions: 384,
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
// ============================================================================
|
|
66
|
+
// Class Skeleton
|
|
67
|
+
// ============================================================================
|
|
68
|
+
/**
|
|
69
|
+
* High-performance embedding storage using HNSW index.
|
|
70
|
+
*
|
|
71
|
+
* Provides O(log n) approximate nearest neighbor search while maintaining
|
|
72
|
+
* compatibility with the existing EmbeddingService interface.
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```typescript
|
|
76
|
+
* // Basic usage
|
|
77
|
+
* const store = new HNSWEmbeddingStore({
|
|
78
|
+
* dbPath: './embeddings.db',
|
|
79
|
+
* indexPath: './embeddings.hnsw',
|
|
80
|
+
* });
|
|
81
|
+
*
|
|
82
|
+
* // Store embeddings
|
|
83
|
+
* store.storeEmbedding('skill-1', embedding1, 'Jest testing framework helper');
|
|
84
|
+
* store.storeEmbedding('skill-2', embedding2, 'Vitest testing utilities');
|
|
85
|
+
*
|
|
86
|
+
* // Find similar
|
|
87
|
+
* const results = store.findSimilar(queryEmbedding, 10);
|
|
88
|
+
* // [{ skillId: 'skill-1', score: 0.95 }, { skillId: 'skill-2', score: 0.87 }, ...]
|
|
89
|
+
*
|
|
90
|
+
* // Clean up
|
|
91
|
+
* store.close();
|
|
92
|
+
* ```
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* // With custom HNSW config for large dataset
|
|
97
|
+
* const store = new HNSWEmbeddingStore({
|
|
98
|
+
* dbPath: './embeddings.db',
|
|
99
|
+
* indexPath: './embeddings.hnsw',
|
|
100
|
+
* hnswConfig: HNSW_PRESETS.large,
|
|
101
|
+
* maxElements: 500000,
|
|
102
|
+
* });
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
export class HNSWEmbeddingStore {
|
|
106
|
+
// -------------------------------------------------------------------------
|
|
107
|
+
// Private Fields
|
|
108
|
+
// -------------------------------------------------------------------------
|
|
109
|
+
/** SQLite database for metadata */
|
|
110
|
+
db = null;
|
|
111
|
+
/** HNSW index instance (from hnswlib-node) */
|
|
112
|
+
index = null;
|
|
113
|
+
/** Whether HNSW is enabled (false = brute-force fallback) */
|
|
114
|
+
hnswEnabled;
|
|
115
|
+
/** Merged HNSW configuration */
|
|
116
|
+
config;
|
|
117
|
+
/** Maximum index capacity */
|
|
118
|
+
maxElements;
|
|
119
|
+
/** Path to HNSW index file */
|
|
120
|
+
indexPath;
|
|
121
|
+
/** Distance metric */
|
|
122
|
+
distanceMetric;
|
|
123
|
+
/** Auto-save flag */
|
|
124
|
+
autoSave;
|
|
125
|
+
/** Map of skill IDs to internal HNSW labels (for reverse lookup) */
|
|
126
|
+
skillIdToLabel = new Map();
|
|
127
|
+
/** Map of HNSW labels to skill IDs */
|
|
128
|
+
labelToSkillId = new Map();
|
|
129
|
+
/** Next available label for HNSW insertion */
|
|
130
|
+
nextLabel = 0;
|
|
131
|
+
// -------------------------------------------------------------------------
|
|
132
|
+
// Constructor
|
|
133
|
+
// -------------------------------------------------------------------------
|
|
134
|
+
/**
|
|
135
|
+
* Create a new HNSWEmbeddingStore instance.
|
|
136
|
+
*
|
|
137
|
+
* @param options - Configuration options
|
|
138
|
+
*
|
|
139
|
+
* @example
|
|
140
|
+
* ```typescript
|
|
141
|
+
* // Default configuration (auto-detects HNSW availability)
|
|
142
|
+
* const store = new HNSWEmbeddingStore();
|
|
143
|
+
*
|
|
144
|
+
* // With persistence
|
|
145
|
+
* const store = new HNSWEmbeddingStore({
|
|
146
|
+
* dbPath: './embeddings.db',
|
|
147
|
+
* indexPath: './embeddings.hnsw',
|
|
148
|
+
* });
|
|
149
|
+
*
|
|
150
|
+
* // Force brute-force fallback
|
|
151
|
+
* const store = new HNSWEmbeddingStore({ useHNSW: false });
|
|
152
|
+
* ```
|
|
153
|
+
*/
|
|
154
|
+
/** V3 VectorDB instance (if initialized) */
|
|
155
|
+
vectorDB = null;
|
|
156
|
+
/** Promise for async initialization */
|
|
157
|
+
initPromise = null;
|
|
158
|
+
constructor(options = {}) {
|
|
159
|
+
// Determine HNSW mode from options or environment
|
|
160
|
+
this.hnswEnabled = this.shouldUseHNSW(options.useHNSW);
|
|
161
|
+
// Merge configuration with defaults
|
|
162
|
+
this.config = {
|
|
163
|
+
...DEFAULT_HNSW_CONFIG,
|
|
164
|
+
...options.hnswConfig,
|
|
165
|
+
};
|
|
166
|
+
this.maxElements = options.maxElements ?? 100000;
|
|
167
|
+
this.indexPath = options.indexPath;
|
|
168
|
+
this.distanceMetric = options.distanceMetric ?? 'cosine';
|
|
169
|
+
this.autoSave = options.autoSave ?? false;
|
|
170
|
+
// Initialize SQLite database
|
|
171
|
+
if (options.dbPath) {
|
|
172
|
+
this.initDatabase(options.dbPath);
|
|
173
|
+
}
|
|
174
|
+
// Initialize HNSW index asynchronously (if enabled)
|
|
175
|
+
if (this.hnswEnabled) {
|
|
176
|
+
this.initPromise = this.initHNSWIndex();
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Ensure the store is fully initialized.
|
|
181
|
+
* Call this before operations that require the HNSW index.
|
|
182
|
+
*/
|
|
183
|
+
async ensureInitialized() {
|
|
184
|
+
if (this.initPromise) {
|
|
185
|
+
await this.initPromise;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
// -------------------------------------------------------------------------
|
|
189
|
+
// Public Methods (IEmbeddingStore Interface)
|
|
190
|
+
// -------------------------------------------------------------------------
|
|
191
|
+
/**
|
|
192
|
+
* Store an embedding with its metadata.
|
|
193
|
+
*
|
|
194
|
+
* Inserts the vector into both HNSW index (for fast search) and
|
|
195
|
+
* SQLite (for metadata persistence).
|
|
196
|
+
*
|
|
197
|
+
* @param skillId - Unique identifier for the skill
|
|
198
|
+
* @param embedding - Vector embedding (must match configured dimensions)
|
|
199
|
+
* @param text - Original text that was embedded
|
|
200
|
+
* @throws Error if embedding dimensions don't match configuration
|
|
201
|
+
*/
|
|
202
|
+
storeEmbedding(skillId, embedding, text) {
|
|
203
|
+
// Validate embedding dimensions
|
|
204
|
+
if (embedding.length !== this.config.dimensions) {
|
|
205
|
+
throw new Error(`Embedding dimension mismatch: got ${embedding.length}, expected ${this.config.dimensions}`);
|
|
206
|
+
}
|
|
207
|
+
// Store in SQLite (metadata + embedding blob for brute-force fallback)
|
|
208
|
+
if (this.db) {
|
|
209
|
+
const buffer = Buffer.from(embedding.buffer);
|
|
210
|
+
const stmt = this.db.prepare(`
|
|
211
|
+
INSERT OR REPLACE INTO skill_embeddings (skill_id, embedding, text, created_at)
|
|
212
|
+
VALUES (?, ?, ?, unixepoch())
|
|
213
|
+
`);
|
|
214
|
+
stmt.run(skillId, buffer, text);
|
|
215
|
+
}
|
|
216
|
+
// Insert into V3 VectorDB (HNSW index)
|
|
217
|
+
if (this.vectorDB) {
|
|
218
|
+
// VectorDB.insert may be sync or async depending on backend
|
|
219
|
+
const result = this.vectorDB.insert(embedding, skillId, { text });
|
|
220
|
+
if (result instanceof Promise) {
|
|
221
|
+
// Fire and forget for sync interface, but log errors
|
|
222
|
+
result.catch((err) => {
|
|
223
|
+
console.warn(`[HNSWEmbeddingStore] Failed to insert into VectorDB: ${err}`);
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Retrieve a stored embedding by skill ID.
|
|
230
|
+
*
|
|
231
|
+
* @param skillId - Unique identifier for the skill
|
|
232
|
+
* @returns The embedding if found, null otherwise
|
|
233
|
+
*/
|
|
234
|
+
getEmbedding(skillId) {
|
|
235
|
+
if (!this.db)
|
|
236
|
+
return null;
|
|
237
|
+
const stmt = this.db.prepare(`
|
|
238
|
+
SELECT embedding FROM skill_embeddings WHERE skill_id = ?
|
|
239
|
+
`);
|
|
240
|
+
const row = stmt.get(skillId);
|
|
241
|
+
if (!row)
|
|
242
|
+
return null;
|
|
243
|
+
return new Float32Array(row.embedding.buffer.slice(row.embedding.byteOffset, row.embedding.byteOffset + row.embedding.byteLength));
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Get all stored embeddings.
|
|
247
|
+
*
|
|
248
|
+
* Note: For large datasets, consider using findSimilar() instead
|
|
249
|
+
* to avoid loading all vectors into memory.
|
|
250
|
+
*
|
|
251
|
+
* @returns Map of skill IDs to their embeddings
|
|
252
|
+
*/
|
|
253
|
+
getAllEmbeddings() {
|
|
254
|
+
if (!this.db)
|
|
255
|
+
return new Map();
|
|
256
|
+
const stmt = this.db.prepare(`
|
|
257
|
+
SELECT skill_id, embedding FROM skill_embeddings
|
|
258
|
+
`);
|
|
259
|
+
const rows = stmt.all();
|
|
260
|
+
const result = new Map();
|
|
261
|
+
for (const row of rows) {
|
|
262
|
+
const embedding = new Float32Array(row.embedding.buffer.slice(row.embedding.byteOffset, row.embedding.byteOffset + row.embedding.byteLength));
|
|
263
|
+
result.set(row.skill_id, embedding);
|
|
264
|
+
}
|
|
265
|
+
return result;
|
|
266
|
+
}
|
|
267
|
+
/**
|
|
268
|
+
* Find most similar embeddings to a query vector.
|
|
269
|
+
*
|
|
270
|
+
* Uses HNSW for O(log n) approximate search when available,
|
|
271
|
+
* falls back to O(n) brute-force cosine similarity otherwise.
|
|
272
|
+
*
|
|
273
|
+
* @param queryEmbedding - Query vector (must match configured dimensions)
|
|
274
|
+
* @param topK - Number of results to return (default: 10)
|
|
275
|
+
* @returns Array of skill IDs with similarity scores, sorted descending
|
|
276
|
+
*/
|
|
277
|
+
findSimilar(queryEmbedding, topK = 10) {
|
|
278
|
+
// Validate query dimensions
|
|
279
|
+
if (queryEmbedding.length !== this.config.dimensions) {
|
|
280
|
+
throw new Error(`Query dimension mismatch: got ${queryEmbedding.length}, expected ${this.config.dimensions}`);
|
|
281
|
+
}
|
|
282
|
+
// Try HNSW search first (if available)
|
|
283
|
+
if (this.vectorDB) {
|
|
284
|
+
try {
|
|
285
|
+
// VectorDB.search may be sync or async
|
|
286
|
+
const searchResult = this.vectorDB.search(queryEmbedding, topK);
|
|
287
|
+
// Handle async case by returning empty and logging
|
|
288
|
+
// (sync interface limitation - caller should use findSimilarAsync for async)
|
|
289
|
+
if (searchResult instanceof Promise) {
|
|
290
|
+
// For sync interface, fall back to brute-force
|
|
291
|
+
console.warn('[HNSWEmbeddingStore] VectorDB.search returned Promise, using brute-force fallback');
|
|
292
|
+
}
|
|
293
|
+
else {
|
|
294
|
+
// Convert VectorDB results to SimilarityResult format
|
|
295
|
+
return searchResult.map((result) => ({
|
|
296
|
+
skillId: result.id,
|
|
297
|
+
score: result.score,
|
|
298
|
+
}));
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
catch (err) {
|
|
302
|
+
console.warn(`[HNSWEmbeddingStore] HNSW search failed, falling back to brute-force: ${err}`);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
// Brute-force fallback: compute cosine similarity for all embeddings
|
|
306
|
+
const allEmbeddings = this.getAllEmbeddings();
|
|
307
|
+
const results = [];
|
|
308
|
+
for (const [skillId, embedding] of allEmbeddings) {
|
|
309
|
+
const score = this.cosineSimilarity(queryEmbedding, embedding);
|
|
310
|
+
results.push({ skillId, score });
|
|
311
|
+
}
|
|
312
|
+
// Sort by similarity score descending and return topK
|
|
313
|
+
results.sort((a, b) => b.score - a.score);
|
|
314
|
+
return results.slice(0, topK);
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Async version of findSimilar for backends that require async search.
|
|
318
|
+
*
|
|
319
|
+
* @param queryEmbedding - Query vector (must match configured dimensions)
|
|
320
|
+
* @param topK - Number of results to return (default: 10)
|
|
321
|
+
* @returns Promise resolving to array of skill IDs with similarity scores
|
|
322
|
+
*/
|
|
323
|
+
async findSimilarAsync(queryEmbedding, topK = 10) {
|
|
324
|
+
// Ensure HNSW is initialized
|
|
325
|
+
await this.ensureInitialized();
|
|
326
|
+
// Validate query dimensions
|
|
327
|
+
if (queryEmbedding.length !== this.config.dimensions) {
|
|
328
|
+
throw new Error(`Query dimension mismatch: got ${queryEmbedding.length}, expected ${this.config.dimensions}`);
|
|
329
|
+
}
|
|
330
|
+
// Try HNSW search first (if available)
|
|
331
|
+
if (this.vectorDB) {
|
|
332
|
+
try {
|
|
333
|
+
const searchResult = this.vectorDB.search(queryEmbedding, topK);
|
|
334
|
+
const results = searchResult instanceof Promise ? await searchResult : searchResult;
|
|
335
|
+
return results.map((result) => ({
|
|
336
|
+
skillId: result.id,
|
|
337
|
+
score: result.score,
|
|
338
|
+
}));
|
|
339
|
+
}
|
|
340
|
+
catch (err) {
|
|
341
|
+
console.warn(`[HNSWEmbeddingStore] HNSW search failed, falling back to brute-force: ${err}`);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
// Brute-force fallback
|
|
345
|
+
return this.findSimilar(queryEmbedding, topK);
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Compute cosine similarity between two embeddings.
|
|
349
|
+
*
|
|
350
|
+
* @param a - First embedding
|
|
351
|
+
* @param b - Second embedding
|
|
352
|
+
* @returns Similarity score between -1 and 1
|
|
353
|
+
* @throws Error if embeddings have different dimensions
|
|
354
|
+
*/
|
|
355
|
+
cosineSimilarity(a, b) {
|
|
356
|
+
if (a.length !== b.length) {
|
|
357
|
+
throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}. ` +
|
|
358
|
+
`Expected ${this.config.dimensions}.`);
|
|
359
|
+
}
|
|
360
|
+
let dotProduct = 0;
|
|
361
|
+
let normA = 0;
|
|
362
|
+
let normB = 0;
|
|
363
|
+
for (let i = 0; i < a.length; i++) {
|
|
364
|
+
dotProduct += a[i] * b[i];
|
|
365
|
+
normA += a[i] * a[i];
|
|
366
|
+
normB += b[i] * b[i];
|
|
367
|
+
}
|
|
368
|
+
if (normA === 0 || normB === 0)
|
|
369
|
+
return 0;
|
|
370
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
371
|
+
}
|
|
372
|
+
/**
|
|
373
|
+
* Check if running in fallback (brute-force) mode.
|
|
374
|
+
*
|
|
375
|
+
* @returns true if using brute-force, false if using HNSW
|
|
376
|
+
*/
|
|
377
|
+
isUsingFallback() {
|
|
378
|
+
return !this.hnswEnabled || this.index === null;
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Close database connections and release resources.
|
|
382
|
+
*
|
|
383
|
+
* Saves HNSW index to disk if indexPath was configured.
|
|
384
|
+
* Safe to call multiple times.
|
|
385
|
+
*/
|
|
386
|
+
close() {
|
|
387
|
+
// TODO: SMI-1519 - Implement
|
|
388
|
+
// 1. Save HNSW index if indexPath configured
|
|
389
|
+
// 2. Close SQLite database
|
|
390
|
+
// 3. Clear label mappings
|
|
391
|
+
if (this.db) {
|
|
392
|
+
this.db.close();
|
|
393
|
+
this.db = null;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
// -------------------------------------------------------------------------
|
|
397
|
+
// Extended Public Methods (HNSW-specific)
|
|
398
|
+
// -------------------------------------------------------------------------
|
|
399
|
+
/**
|
|
400
|
+
* Get statistics about the HNSW index.
|
|
401
|
+
*
|
|
402
|
+
* @returns Index statistics including capacity, utilization, and config
|
|
403
|
+
*/
|
|
404
|
+
getStats() {
|
|
405
|
+
let vectorCount = 0;
|
|
406
|
+
// Get count from SQLite
|
|
407
|
+
if (this.db) {
|
|
408
|
+
const stmt = this.db.prepare('SELECT COUNT(*) as count FROM skill_embeddings');
|
|
409
|
+
const row = stmt.get();
|
|
410
|
+
vectorCount = row.count;
|
|
411
|
+
}
|
|
412
|
+
// Get count from VectorDB if available
|
|
413
|
+
if (this.vectorDB) {
|
|
414
|
+
try {
|
|
415
|
+
const size = this.vectorDB.size();
|
|
416
|
+
if (!(size instanceof Promise)) {
|
|
417
|
+
vectorCount = Math.max(vectorCount, size);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
catch {
|
|
421
|
+
// Ignore errors, use SQLite count
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
const utilizationPercent = this.maxElements > 0 ? (vectorCount / this.maxElements) * 100 : 0;
|
|
425
|
+
// Estimate memory usage (rough approximation)
|
|
426
|
+
// HNSW uses ~(4 * dimensions + M * 4 * 2) bytes per vector
|
|
427
|
+
const bytesPerVector = 4 * this.config.dimensions + this.config.m * 8;
|
|
428
|
+
const memoryUsageBytes = vectorCount * bytesPerVector;
|
|
429
|
+
return {
|
|
430
|
+
vectorCount,
|
|
431
|
+
maxCapacity: this.maxElements,
|
|
432
|
+
utilizationPercent: Math.round(utilizationPercent * 100) / 100,
|
|
433
|
+
m: this.config.m,
|
|
434
|
+
efConstruction: this.config.efConstruction,
|
|
435
|
+
efSearch: this.config.efSearch,
|
|
436
|
+
dimensions: this.config.dimensions,
|
|
437
|
+
memoryUsageBytes,
|
|
438
|
+
isHNSWEnabled: this.vectorDB !== null,
|
|
439
|
+
indexPath: this.indexPath,
|
|
440
|
+
};
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Batch insert multiple embeddings efficiently.
|
|
444
|
+
*
|
|
445
|
+
* More efficient than calling storeEmbedding() in a loop
|
|
446
|
+
* due to batched SQLite transactions and HNSW insertions.
|
|
447
|
+
*
|
|
448
|
+
* @param embeddings - Array of embeddings to insert
|
|
449
|
+
* @returns Batch operation result with counts and timing
|
|
450
|
+
*/
|
|
451
|
+
batchInsert(embeddings) {
|
|
452
|
+
const startTime = Date.now();
|
|
453
|
+
const result = {
|
|
454
|
+
inserted: 0,
|
|
455
|
+
updated: 0,
|
|
456
|
+
failed: 0,
|
|
457
|
+
errors: [],
|
|
458
|
+
durationMs: 0,
|
|
459
|
+
};
|
|
460
|
+
if (!this.db) {
|
|
461
|
+
result.errors.push({ skillId: '*', error: 'Database not initialized' });
|
|
462
|
+
result.durationMs = Date.now() - startTime;
|
|
463
|
+
return result;
|
|
464
|
+
}
|
|
465
|
+
// Use a transaction for batch SQLite operations
|
|
466
|
+
const insertStmt = this.db.prepare(`
|
|
467
|
+
INSERT OR REPLACE INTO skill_embeddings (skill_id, embedding, text, created_at)
|
|
468
|
+
VALUES (?, ?, ?, unixepoch())
|
|
469
|
+
`);
|
|
470
|
+
const checkStmt = this.db.prepare(`
|
|
471
|
+
SELECT 1 FROM skill_embeddings WHERE skill_id = ?
|
|
472
|
+
`);
|
|
473
|
+
const transaction = this.db.transaction(() => {
|
|
474
|
+
for (const { skillId, embedding, text } of embeddings) {
|
|
475
|
+
try {
|
|
476
|
+
// Validate dimensions
|
|
477
|
+
if (embedding.length !== this.config.dimensions) {
|
|
478
|
+
result.failed++;
|
|
479
|
+
result.errors.push({
|
|
480
|
+
skillId,
|
|
481
|
+
error: `Dimension mismatch: got ${embedding.length}, expected ${this.config.dimensions}`,
|
|
482
|
+
});
|
|
483
|
+
continue;
|
|
484
|
+
}
|
|
485
|
+
// Check if exists (for updated count)
|
|
486
|
+
const exists = checkStmt.get(skillId);
|
|
487
|
+
// Insert into SQLite
|
|
488
|
+
const buffer = Buffer.from(embedding.buffer);
|
|
489
|
+
insertStmt.run(skillId, buffer, text);
|
|
490
|
+
// Insert into VectorDB
|
|
491
|
+
if (this.vectorDB) {
|
|
492
|
+
try {
|
|
493
|
+
this.vectorDB.insert(embedding, skillId, { text });
|
|
494
|
+
}
|
|
495
|
+
catch (err) {
|
|
496
|
+
// Log but don't fail - SQLite is the source of truth
|
|
497
|
+
console.warn(`[HNSWEmbeddingStore] VectorDB insert failed for ${skillId}: ${err}`);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
if (exists) {
|
|
501
|
+
result.updated++;
|
|
502
|
+
}
|
|
503
|
+
else {
|
|
504
|
+
result.inserted++;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
catch (err) {
|
|
508
|
+
result.failed++;
|
|
509
|
+
result.errors.push({
|
|
510
|
+
skillId,
|
|
511
|
+
error: err instanceof Error ? err.message : String(err),
|
|
512
|
+
});
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
});
|
|
516
|
+
transaction();
|
|
517
|
+
result.durationMs = Date.now() - startTime;
|
|
518
|
+
return result;
|
|
519
|
+
}
|
|
520
|
+
/**
|
|
521
|
+
* Remove an embedding from the store.
|
|
522
|
+
*
|
|
523
|
+
* Note: HNSW does not support true deletion. The vector is marked
|
|
524
|
+
* as deleted and excluded from search results, but memory is not
|
|
525
|
+
* reclaimed until the index is rebuilt.
|
|
526
|
+
*
|
|
527
|
+
* @param skillId - Unique identifier for the skill to remove
|
|
528
|
+
* @returns true if removed, false if not found
|
|
529
|
+
*/
|
|
530
|
+
removeEmbedding(skillId) {
|
|
531
|
+
let removed = false;
|
|
532
|
+
// Remove from SQLite
|
|
533
|
+
if (this.db) {
|
|
534
|
+
const stmt = this.db.prepare('DELETE FROM skill_embeddings WHERE skill_id = ?');
|
|
535
|
+
const result = stmt.run(skillId);
|
|
536
|
+
removed = result.changes > 0;
|
|
537
|
+
}
|
|
538
|
+
// Remove from VectorDB (if supported)
|
|
539
|
+
if (this.vectorDB && removed) {
|
|
540
|
+
try {
|
|
541
|
+
const vdbResult = this.vectorDB.remove(skillId);
|
|
542
|
+
// VectorDB.remove may be sync or async
|
|
543
|
+
if (vdbResult instanceof Promise) {
|
|
544
|
+
vdbResult.catch((err) => {
|
|
545
|
+
console.warn(`[HNSWEmbeddingStore] VectorDB remove failed for ${skillId}: ${err}`);
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
catch (err) {
|
|
550
|
+
// Log but don't fail - SQLite is the source of truth
|
|
551
|
+
console.warn(`[HNSWEmbeddingStore] VectorDB remove failed for ${skillId}: ${err}`);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
return removed;
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Save the HNSW index to disk.
|
|
558
|
+
*
|
|
559
|
+
* Note: V3 VectorDB manages its own persistence, so this is a no-op
|
|
560
|
+
* unless using hnswlib-node directly.
|
|
561
|
+
*
|
|
562
|
+
* @throws Error if indexPath was not configured
|
|
563
|
+
*/
|
|
564
|
+
saveIndex() {
|
|
565
|
+
if (!this.indexPath) {
|
|
566
|
+
throw new Error('Cannot save index: indexPath not configured');
|
|
567
|
+
}
|
|
568
|
+
// V3 VectorDB handles its own persistence
|
|
569
|
+
// For hnswlib-node, we would call index.saveIndex(this.indexPath)
|
|
570
|
+
console.log(`[HNSWEmbeddingStore] Index persistence managed by V3 VectorDB backend`);
|
|
571
|
+
}
|
|
572
|
+
/**
|
|
573
|
+
* Load the HNSW index from disk.
|
|
574
|
+
*
|
|
575
|
+
* Note: V3 VectorDB manages its own persistence, so this is a no-op
|
|
576
|
+
* unless using hnswlib-node directly.
|
|
577
|
+
*
|
|
578
|
+
* @throws Error if indexPath was not configured or file doesn't exist
|
|
579
|
+
*/
|
|
580
|
+
loadIndex() {
|
|
581
|
+
if (!this.indexPath) {
|
|
582
|
+
throw new Error('Cannot load index: indexPath not configured');
|
|
583
|
+
}
|
|
584
|
+
// V3 VectorDB handles its own persistence
|
|
585
|
+
// For hnswlib-node, we would call index.loadIndex(this.indexPath)
|
|
586
|
+
console.log(`[HNSWEmbeddingStore] Index persistence managed by V3 VectorDB backend`);
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Rebuild the HNSW index from SQLite data.
|
|
590
|
+
*
|
|
591
|
+
* Useful after many deletions to reclaim memory, or to apply
|
|
592
|
+
* new HNSW configuration parameters.
|
|
593
|
+
*
|
|
594
|
+
* @param newConfig - Optional new HNSW configuration
|
|
595
|
+
*/
|
|
596
|
+
async rebuildIndex(newConfig) {
|
|
597
|
+
// Update config if provided
|
|
598
|
+
if (newConfig) {
|
|
599
|
+
Object.assign(this.config, newConfig);
|
|
600
|
+
}
|
|
601
|
+
// Clear existing VectorDB
|
|
602
|
+
if (this.vectorDB) {
|
|
603
|
+
try {
|
|
604
|
+
const clearResult = this.vectorDB.clear();
|
|
605
|
+
if (clearResult instanceof Promise) {
|
|
606
|
+
await clearResult;
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
catch (err) {
|
|
610
|
+
console.warn(`[HNSWEmbeddingStore] Failed to clear VectorDB: ${err}`);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
// Reinitialize VectorDB
|
|
614
|
+
await this.initHNSWIndex();
|
|
615
|
+
// Re-insert all embeddings from SQLite
|
|
616
|
+
if (this.db && this.vectorDB) {
|
|
617
|
+
const allEmbeddings = this.getAllEmbeddings();
|
|
618
|
+
for (const [skillId, embedding] of allEmbeddings) {
|
|
619
|
+
try {
|
|
620
|
+
const result = this.vectorDB.insert(embedding, skillId);
|
|
621
|
+
if (result instanceof Promise) {
|
|
622
|
+
await result;
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
catch (err) {
|
|
626
|
+
console.warn(`[HNSWEmbeddingStore] Failed to reinsert ${skillId}: ${err}`);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Update efSearch parameter at runtime.
|
|
633
|
+
*
|
|
634
|
+
* Note: V3 VectorDB does not expose efSearch tuning directly.
|
|
635
|
+
* This method is provided for API compatibility.
|
|
636
|
+
*
|
|
637
|
+
* @param efSearch - New efSearch value (must be > 0)
|
|
638
|
+
*/
|
|
639
|
+
setEfSearch(efSearch) {
|
|
640
|
+
if (efSearch <= 0) {
|
|
641
|
+
throw new Error('efSearch must be > 0');
|
|
642
|
+
}
|
|
643
|
+
this.config.efSearch = efSearch;
|
|
644
|
+
// V3 VectorDB doesn't expose efSearch tuning
|
|
645
|
+
// For hnswlib-node, we would call index.setEfSearch(efSearch)
|
|
646
|
+
console.log(`[HNSWEmbeddingStore] efSearch updated to ${efSearch} (will apply on next search)`);
|
|
647
|
+
}
|
|
648
|
+
// -------------------------------------------------------------------------
|
|
649
|
+
// Private Methods
|
|
650
|
+
// -------------------------------------------------------------------------
|
|
651
|
+
/**
|
|
652
|
+
* Determine whether to use HNSW based on explicit option or environment.
|
|
653
|
+
*/
|
|
654
|
+
shouldUseHNSW(explicit) {
|
|
655
|
+
if (explicit !== undefined) {
|
|
656
|
+
return explicit;
|
|
657
|
+
}
|
|
658
|
+
// Check environment variable
|
|
659
|
+
const envValue = process.env.SKILLSMITH_USE_HNSW;
|
|
660
|
+
if (envValue !== undefined) {
|
|
661
|
+
return envValue === 'true' || envValue === '1';
|
|
662
|
+
}
|
|
663
|
+
// Default to false (use brute-force) for backward compatibility
|
|
664
|
+
// TODO: Consider changing default to true in future version
|
|
665
|
+
return false;
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Initialize SQLite database and create tables.
|
|
669
|
+
*/
|
|
670
|
+
initDatabase(dbPath) {
|
|
671
|
+
this.db = new Database(dbPath);
|
|
672
|
+
// Create skill_embeddings table
|
|
673
|
+
this.db.exec(`
|
|
674
|
+
CREATE TABLE IF NOT EXISTS skill_embeddings (
|
|
675
|
+
skill_id TEXT PRIMARY KEY,
|
|
676
|
+
embedding BLOB NOT NULL,
|
|
677
|
+
text TEXT NOT NULL,
|
|
678
|
+
created_at INTEGER DEFAULT (unixepoch())
|
|
679
|
+
)
|
|
680
|
+
`);
|
|
681
|
+
// Create index for fast lookups
|
|
682
|
+
this.db.exec(`
|
|
683
|
+
CREATE INDEX IF NOT EXISTS idx_skill_embeddings_id
|
|
684
|
+
ON skill_embeddings(skill_id)
|
|
685
|
+
`);
|
|
686
|
+
}
|
|
687
|
+
/**
|
|
688
|
+
* Initialize HNSW index using V3 VectorDB API.
|
|
689
|
+
* Falls back gracefully if V3 is unavailable.
|
|
690
|
+
*/
|
|
691
|
+
async initHNSWIndex() {
|
|
692
|
+
try {
|
|
693
|
+
// Dynamically import V3 VectorDB module
|
|
694
|
+
const vectorDbModule = await import('claude-flow/v3/@claude-flow/cli/dist/src/ruvector/vector-db.js');
|
|
695
|
+
// Load ruvector backend (may use WASM acceleration)
|
|
696
|
+
const loaded = await vectorDbModule.loadRuVector();
|
|
697
|
+
if (!loaded) {
|
|
698
|
+
console.warn('[HNSWEmbeddingStore] ruvector not available, using fallback backend');
|
|
699
|
+
}
|
|
700
|
+
// Create VectorDB instance
|
|
701
|
+
this.vectorDB = await vectorDbModule.createVectorDB(this.config.dimensions);
|
|
702
|
+
// Log status
|
|
703
|
+
const status = vectorDbModule.getStatus();
|
|
704
|
+
console.log(`[HNSWEmbeddingStore] Initialized with backend: ${status.backend}` +
|
|
705
|
+
(status.wasmAccelerated ? ' (WASM accelerated)' : ''));
|
|
706
|
+
// Re-populate VectorDB from SQLite if we have existing data
|
|
707
|
+
if (this.db) {
|
|
708
|
+
const count = this.db.prepare('SELECT COUNT(*) as c FROM skill_embeddings').get();
|
|
709
|
+
if (count.c > 0) {
|
|
710
|
+
console.log(`[HNSWEmbeddingStore] Rebuilding index from ${count.c} existing embeddings...`);
|
|
711
|
+
const allEmbeddings = this.getAllEmbeddings();
|
|
712
|
+
for (const [skillId, embedding] of allEmbeddings) {
|
|
713
|
+
try {
|
|
714
|
+
const result = this.vectorDB.insert(embedding, skillId);
|
|
715
|
+
if (result instanceof Promise) {
|
|
716
|
+
await result;
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
catch (err) {
|
|
720
|
+
console.warn(`[HNSWEmbeddingStore] Failed to insert ${skillId}: ${err}`);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
console.log(`[HNSWEmbeddingStore] Index rebuilt with ${allEmbeddings.size} vectors`);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
catch (err) {
|
|
728
|
+
// V3 VectorDB not available - will use brute-force fallback
|
|
729
|
+
console.warn(`[HNSWEmbeddingStore] Failed to initialize V3 VectorDB, using brute-force fallback: ${err}`);
|
|
730
|
+
this.vectorDB = null;
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
/**
|
|
734
|
+
* Convert HNSW distance to similarity score.
|
|
735
|
+
* HNSW returns distances, we need similarities (higher = more similar).
|
|
736
|
+
*/
|
|
737
|
+
distanceToSimilarity(distance) {
|
|
738
|
+
// For cosine space, HNSW returns 1 - cosine_similarity
|
|
739
|
+
// So similarity = 1 - distance
|
|
740
|
+
if (this.distanceMetric === 'cosine') {
|
|
741
|
+
return 1 - distance;
|
|
742
|
+
}
|
|
743
|
+
// For L2/IP, need different conversion
|
|
744
|
+
// TODO: Implement for other metrics
|
|
745
|
+
return 1 / (1 + distance);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
// ============================================================================
|
|
749
|
+
// Factory Functions
|
|
750
|
+
// ============================================================================
|
|
751
|
+
/**
|
|
752
|
+
* Create an HNSWEmbeddingStore with a preset configuration.
|
|
753
|
+
*
|
|
754
|
+
* @param preset - Preset name ('small', 'medium', 'large', 'xlarge')
|
|
755
|
+
* @param options - Additional options (merged with preset)
|
|
756
|
+
* @returns Configured HNSWEmbeddingStore instance
|
|
757
|
+
*
|
|
758
|
+
* @example
|
|
759
|
+
* ```typescript
|
|
760
|
+
* const store = createHNSWStore('large', {
|
|
761
|
+
* dbPath: './embeddings.db',
|
|
762
|
+
* indexPath: './embeddings.hnsw',
|
|
763
|
+
* });
|
|
764
|
+
* ```
|
|
765
|
+
*/
|
|
766
|
+
export function createHNSWStore(preset, options = {}) {
|
|
767
|
+
return new HNSWEmbeddingStore({
|
|
768
|
+
...options,
|
|
769
|
+
hnswConfig: HNSW_PRESETS[preset],
|
|
770
|
+
});
|
|
771
|
+
}
|
|
772
|
+
/**
|
|
773
|
+
* Check if hnswlib-node is available.
|
|
774
|
+
*
|
|
775
|
+
* Useful for conditional logic or graceful degradation.
|
|
776
|
+
*
|
|
777
|
+
* @returns true if hnswlib-node can be loaded
|
|
778
|
+
*/
|
|
779
|
+
export async function isHNSWAvailable() {
|
|
780
|
+
try {
|
|
781
|
+
// Dynamic import to check availability without static analysis errors
|
|
782
|
+
await Function('return import("hnswlib-node")')();
|
|
783
|
+
return true;
|
|
784
|
+
}
|
|
785
|
+
catch {
|
|
786
|
+
return false;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
/**
|
|
790
|
+
* Dynamically load hnswlib-node module.
|
|
791
|
+
*
|
|
792
|
+
* @returns The HierarchicalNSW constructor, or null if unavailable
|
|
793
|
+
* @internal
|
|
794
|
+
*/
|
|
795
|
+
export async function loadHNSWLib() {
|
|
796
|
+
try {
|
|
797
|
+
// Dynamic import to avoid TypeScript static analysis
|
|
798
|
+
const mod = await Function('return import("hnswlib-node")')();
|
|
799
|
+
return mod;
|
|
800
|
+
}
|
|
801
|
+
catch {
|
|
802
|
+
return null;
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
//# sourceMappingURL=hnsw-store.js.map
|