@skillsmith/core 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. package/dist/.tsbuildinfo +1 -1
  2. package/dist/src/analysis/types.d.ts +2 -0
  3. package/dist/src/analysis/types.d.ts.map +1 -1
  4. package/dist/src/analysis/types.js +13 -1
  5. package/dist/src/analysis/types.js.map +1 -1
  6. package/dist/src/analytics/AnalyticsRepository.d.ts +4 -0
  7. package/dist/src/analytics/AnalyticsRepository.d.ts.map +1 -1
  8. package/dist/src/analytics/AnalyticsRepository.js +26 -44
  9. package/dist/src/analytics/AnalyticsRepository.js.map +1 -1
  10. package/dist/src/analytics/schema.d.ts +1 -1
  11. package/dist/src/analytics/schema.d.ts.map +1 -1
  12. package/dist/src/analytics/schema.js +68 -0
  13. package/dist/src/analytics/schema.js.map +1 -1
  14. package/dist/src/api/client.d.ts +33 -29
  15. package/dist/src/api/client.d.ts.map +1 -1
  16. package/dist/src/api/client.js +15 -10
  17. package/dist/src/api/client.js.map +1 -1
  18. package/dist/src/billing/BillingService.d.ts +139 -0
  19. package/dist/src/billing/BillingService.d.ts.map +1 -0
  20. package/dist/src/billing/BillingService.js +393 -0
  21. package/dist/src/billing/BillingService.js.map +1 -0
  22. package/dist/src/billing/GDPRComplianceService.d.ts +176 -0
  23. package/dist/src/billing/GDPRComplianceService.d.ts.map +1 -0
  24. package/dist/src/billing/GDPRComplianceService.js +361 -0
  25. package/dist/src/billing/GDPRComplianceService.js.map +1 -0
  26. package/dist/src/billing/StripeClient.d.ts +177 -0
  27. package/dist/src/billing/StripeClient.d.ts.map +1 -0
  28. package/dist/src/billing/StripeClient.js +462 -0
  29. package/dist/src/billing/StripeClient.js.map +1 -0
  30. package/dist/src/billing/StripeReconciliationJob.d.ts +95 -0
  31. package/dist/src/billing/StripeReconciliationJob.d.ts.map +1 -0
  32. package/dist/src/billing/StripeReconciliationJob.js +405 -0
  33. package/dist/src/billing/StripeReconciliationJob.js.map +1 -0
  34. package/dist/src/billing/StripeWebhookHandler.d.ts +92 -0
  35. package/dist/src/billing/StripeWebhookHandler.d.ts.map +1 -0
  36. package/dist/src/billing/StripeWebhookHandler.js +409 -0
  37. package/dist/src/billing/StripeWebhookHandler.js.map +1 -0
  38. package/dist/src/billing/index.d.ts +18 -0
  39. package/dist/src/billing/index.d.ts.map +1 -0
  40. package/dist/src/billing/index.js +19 -0
  41. package/dist/src/billing/index.js.map +1 -0
  42. package/dist/src/billing/types.d.ts +266 -0
  43. package/dist/src/billing/types.d.ts.map +1 -0
  44. package/dist/src/billing/types.js +23 -0
  45. package/dist/src/billing/types.js.map +1 -0
  46. package/dist/src/embeddings/hnsw-store.d.ts +568 -0
  47. package/dist/src/embeddings/hnsw-store.d.ts.map +1 -0
  48. package/dist/src/embeddings/hnsw-store.js +805 -0
  49. package/dist/src/embeddings/hnsw-store.js.map +1 -0
  50. package/dist/src/embeddings/index.d.ts +2 -0
  51. package/dist/src/embeddings/index.d.ts.map +1 -1
  52. package/dist/src/embeddings/index.js +2 -0
  53. package/dist/src/embeddings/index.js.map +1 -1
  54. package/dist/src/index.d.ts +1 -0
  55. package/dist/src/index.d.ts.map +1 -1
  56. package/dist/src/index.js +2 -0
  57. package/dist/src/index.js.map +1 -1
  58. package/dist/src/learning/PatternStore.d.ts +457 -0
  59. package/dist/src/learning/PatternStore.d.ts.map +1 -0
  60. package/dist/src/learning/PatternStore.js +893 -0
  61. package/dist/src/learning/PatternStore.js.map +1 -0
  62. package/dist/src/learning/ReasoningBankIntegration.d.ts +403 -0
  63. package/dist/src/learning/ReasoningBankIntegration.d.ts.map +1 -0
  64. package/dist/src/learning/ReasoningBankIntegration.js +627 -0
  65. package/dist/src/learning/ReasoningBankIntegration.js.map +1 -0
  66. package/dist/src/learning/index.d.ts +15 -0
  67. package/dist/src/learning/index.d.ts.map +1 -0
  68. package/dist/src/learning/index.js +15 -0
  69. package/dist/src/learning/index.js.map +1 -0
  70. package/dist/src/routing/SONARouter.d.ts +154 -0
  71. package/dist/src/routing/SONARouter.d.ts.map +1 -0
  72. package/dist/src/routing/SONARouter.js +679 -0
  73. package/dist/src/routing/SONARouter.js.map +1 -0
  74. package/dist/src/routing/index.d.ts +9 -0
  75. package/dist/src/routing/index.d.ts.map +1 -0
  76. package/dist/src/routing/index.js +10 -0
  77. package/dist/src/routing/index.js.map +1 -0
  78. package/dist/src/routing/types.d.ts +331 -0
  79. package/dist/src/routing/types.d.ts.map +1 -0
  80. package/dist/src/routing/types.js +203 -0
  81. package/dist/src/routing/types.js.map +1 -0
  82. package/dist/src/scripts/__tests__/scan-imported-skills.test.js +5 -0
  83. package/dist/src/scripts/__tests__/scan-imported-skills.test.js.map +1 -1
  84. package/dist/src/security/SkillSandbox.d.ts +156 -0
  85. package/dist/src/security/SkillSandbox.d.ts.map +1 -0
  86. package/dist/src/security/SkillSandbox.js +303 -0
  87. package/dist/src/security/SkillSandbox.js.map +1 -0
  88. package/dist/src/security/index.d.ts +3 -1
  89. package/dist/src/security/index.d.ts.map +1 -1
  90. package/dist/src/security/index.js +5 -1
  91. package/dist/src/security/index.js.map +1 -1
  92. package/dist/src/security/rate-limiter/presets.d.ts +12 -0
  93. package/dist/src/security/rate-limiter/presets.d.ts.map +1 -1
  94. package/dist/src/security/rate-limiter/presets.js +12 -0
  95. package/dist/src/security/rate-limiter/presets.js.map +1 -1
  96. package/dist/src/security/sanitization.d.ts +85 -0
  97. package/dist/src/security/sanitization.d.ts.map +1 -1
  98. package/dist/src/security/sanitization.js +133 -0
  99. package/dist/src/security/sanitization.js.map +1 -1
  100. package/dist/src/security/scanner/SecurityScanner.d.ts +23 -0
  101. package/dist/src/security/scanner/SecurityScanner.d.ts.map +1 -1
  102. package/dist/src/security/scanner/SecurityScanner.js +232 -28
  103. package/dist/src/security/scanner/SecurityScanner.js.map +1 -1
  104. package/dist/src/security/scanner/patterns.d.ts +13 -0
  105. package/dist/src/security/scanner/patterns.d.ts.map +1 -1
  106. package/dist/src/security/scanner/patterns.js +51 -0
  107. package/dist/src/security/scanner/patterns.js.map +1 -1
  108. package/dist/src/security/scanner/types.d.ts +13 -1
  109. package/dist/src/security/scanner/types.d.ts.map +1 -1
  110. package/dist/src/security/scanner/weights.d.ts.map +1 -1
  111. package/dist/src/security/scanner/weights.js +1 -0
  112. package/dist/src/security/scanner/weights.js.map +1 -1
  113. package/dist/src/session/SessionManager.d.ts +7 -0
  114. package/dist/src/session/SessionManager.d.ts.map +1 -1
  115. package/dist/src/session/SessionManager.js +117 -10
  116. package/dist/src/session/SessionManager.js.map +1 -1
  117. package/dist/src/sync/SyncEngine.d.ts.map +1 -1
  118. package/dist/src/sync/SyncEngine.js +52 -32
  119. package/dist/src/sync/SyncEngine.js.map +1 -1
  120. package/dist/src/testing/MultiLLMProvider.d.ts +374 -0
  121. package/dist/src/testing/MultiLLMProvider.d.ts.map +1 -0
  122. package/dist/src/testing/MultiLLMProvider.js +720 -0
  123. package/dist/src/testing/MultiLLMProvider.js.map +1 -0
  124. package/dist/src/testing/index.d.ts +8 -0
  125. package/dist/src/testing/index.d.ts.map +1 -0
  126. package/dist/src/testing/index.js +9 -0
  127. package/dist/src/testing/index.js.map +1 -0
  128. package/dist/src/types.d.ts +3 -0
  129. package/dist/src/types.d.ts.map +1 -1
  130. package/dist/tests/SecurityScanner.test.js +337 -1
  131. package/dist/tests/SecurityScanner.test.js.map +1 -1
  132. package/dist/tests/billing/BillingService.test.d.ts +7 -0
  133. package/dist/tests/billing/BillingService.test.d.ts.map +1 -0
  134. package/dist/tests/billing/BillingService.test.js +168 -0
  135. package/dist/tests/billing/BillingService.test.js.map +1 -0
  136. package/dist/tests/billing/GDPRCompliance.test.d.ts +7 -0
  137. package/dist/tests/billing/GDPRCompliance.test.d.ts.map +1 -0
  138. package/dist/tests/billing/GDPRCompliance.test.js +195 -0
  139. package/dist/tests/billing/GDPRCompliance.test.js.map +1 -0
  140. package/dist/tests/billing/StripeReconciliation.test.d.ts +7 -0
  141. package/dist/tests/billing/StripeReconciliation.test.d.ts.map +1 -0
  142. package/dist/tests/billing/StripeReconciliation.test.js +266 -0
  143. package/dist/tests/billing/StripeReconciliation.test.js.map +1 -0
  144. package/dist/tests/billing/stripe-validators.test.d.ts +7 -0
  145. package/dist/tests/billing/stripe-validators.test.d.ts.map +1 -0
  146. package/dist/tests/billing/stripe-validators.test.js +107 -0
  147. package/dist/tests/billing/stripe-validators.test.js.map +1 -0
  148. package/dist/tests/embeddings/hnsw-store.test.d.ts +7 -0
  149. package/dist/tests/embeddings/hnsw-store.test.d.ts.map +1 -0
  150. package/dist/tests/embeddings/hnsw-store.test.js +295 -0
  151. package/dist/tests/embeddings/hnsw-store.test.js.map +1 -0
  152. package/dist/tests/integration/neural/e2e-learning.test.d.ts +17 -0
  153. package/dist/tests/integration/neural/e2e-learning.test.d.ts.map +1 -0
  154. package/dist/tests/integration/neural/e2e-learning.test.js +238 -0
  155. package/dist/tests/integration/neural/e2e-learning.test.js.map +1 -0
  156. package/dist/tests/integration/neural/helpers.d.ts +132 -0
  157. package/dist/tests/integration/neural/helpers.d.ts.map +1 -0
  158. package/dist/tests/integration/neural/helpers.js +287 -0
  159. package/dist/tests/integration/neural/helpers.js.map +1 -0
  160. package/dist/tests/integration/neural/personalization.test.d.ts +21 -0
  161. package/dist/tests/integration/neural/personalization.test.d.ts.map +1 -0
  162. package/dist/tests/integration/neural/personalization.test.js +304 -0
  163. package/dist/tests/integration/neural/personalization.test.js.map +1 -0
  164. package/dist/tests/integration/neural/preference-learner.test.d.ts +23 -0
  165. package/dist/tests/integration/neural/preference-learner.test.d.ts.map +1 -0
  166. package/dist/tests/integration/neural/preference-learner.test.js +289 -0
  167. package/dist/tests/integration/neural/preference-learner.test.js.map +1 -0
  168. package/dist/tests/integration/neural/privacy.test.d.ts +19 -0
  169. package/dist/tests/integration/neural/privacy.test.d.ts.map +1 -0
  170. package/dist/tests/integration/neural/privacy.test.js +249 -0
  171. package/dist/tests/integration/neural/privacy.test.js.map +1 -0
  172. package/dist/tests/integration/neural/setup.d.ts +175 -0
  173. package/dist/tests/integration/neural/setup.d.ts.map +1 -0
  174. package/dist/tests/integration/neural/setup.js +487 -0
  175. package/dist/tests/integration/neural/setup.js.map +1 -0
  176. package/dist/tests/integration/neural/signal-collection.test.d.ts +21 -0
  177. package/dist/tests/integration/neural/signal-collection.test.d.ts.map +1 -0
  178. package/dist/tests/integration/neural/signal-collection.test.js +232 -0
  179. package/dist/tests/integration/neural/signal-collection.test.js.map +1 -0
  180. package/dist/tests/learning/PatternStore.test.d.ts +8 -0
  181. package/dist/tests/learning/PatternStore.test.d.ts.map +1 -0
  182. package/dist/tests/learning/PatternStore.test.js +589 -0
  183. package/dist/tests/learning/PatternStore.test.js.map +1 -0
  184. package/dist/tests/learning/ReasoningBankIntegration.test.d.ts +8 -0
  185. package/dist/tests/learning/ReasoningBankIntegration.test.d.ts.map +1 -0
  186. package/dist/tests/learning/ReasoningBankIntegration.test.js +269 -0
  187. package/dist/tests/learning/ReasoningBankIntegration.test.js.map +1 -0
  188. package/dist/tests/routing/SONARouter.test.d.ts +8 -0
  189. package/dist/tests/routing/SONARouter.test.d.ts.map +1 -0
  190. package/dist/tests/routing/SONARouter.test.js +400 -0
  191. package/dist/tests/routing/SONARouter.test.js.map +1 -0
  192. package/dist/tests/security/ContinuousSecurity.test.js +10 -12
  193. package/dist/tests/security/ContinuousSecurity.test.js.map +1 -1
  194. package/dist/tests/security/SkillSandbox.test.d.ts +8 -0
  195. package/dist/tests/security/SkillSandbox.test.d.ts.map +1 -0
  196. package/dist/tests/security/SkillSandbox.test.js +321 -0
  197. package/dist/tests/security/SkillSandbox.test.js.map +1 -0
  198. package/dist/tests/sync/SyncEngine.test.js +4 -2
  199. package/dist/tests/sync/SyncEngine.test.js.map +1 -1
  200. package/dist/tests/testing/MultiLLMProvider.test.d.ts +14 -0
  201. package/dist/tests/testing/MultiLLMProvider.test.d.ts.map +1 -0
  202. package/dist/tests/testing/MultiLLMProvider.test.js +438 -0
  203. package/dist/tests/testing/MultiLLMProvider.test.js.map +1 -0
  204. package/package.json +16 -3
@@ -0,0 +1,805 @@
1
+ /**
2
+ * SMI-1519: HNSW Embedding Store
3
+ *
4
+ * High-performance vector storage using HNSW (Hierarchical Navigable Small World)
5
+ * index for fast approximate nearest neighbor (ANN) search.
6
+ *
7
+ * Features:
8
+ * - O(log n) similarity search vs O(n) brute-force (150x faster)
9
+ * - SQLite for metadata persistence (skill_id, text, created_at)
10
+ * - Graceful fallback to brute-force if HNSW unavailable
11
+ * - Compatible with existing EmbeddingService interface
12
+ * - Uses claude-flow V3 VectorDB API with automatic fallback
13
+ *
14
+ * Enable via environment variable: SKILLSMITH_USE_HNSW=true
15
+ *
16
+ * @see ADR-009: Embedding Service Fallback Strategy
17
+ */
18
+ import Database from 'better-sqlite3';
19
+ // ============================================================================
20
+ // Default Configuration
21
+ // ============================================================================
22
+ /**
23
+ * Default HNSW configuration optimized for skill embeddings.
24
+ * Tuned for ~10k-100k skills with balanced speed/recall.
25
+ */
26
+ export const DEFAULT_HNSW_CONFIG = {
27
+ m: 16,
28
+ efConstruction: 200,
29
+ efSearch: 100,
30
+ dimensions: 384, // all-MiniLM-L6-v2
31
+ };
32
+ /**
33
+ * HNSW configuration presets for different use cases
34
+ */
35
+ export const HNSW_PRESETS = {
36
+ /** Fast search, lower memory, suitable for <10k vectors */
37
+ small: {
38
+ m: 8,
39
+ efConstruction: 100,
40
+ efSearch: 50,
41
+ dimensions: 384,
42
+ },
43
+ /** Balanced performance, suitable for 10k-100k vectors */
44
+ medium: {
45
+ m: 16,
46
+ efConstruction: 200,
47
+ efSearch: 100,
48
+ dimensions: 384,
49
+ },
50
+ /** High recall, suitable for 100k-1M vectors */
51
+ large: {
52
+ m: 32,
53
+ efConstruction: 400,
54
+ efSearch: 150,
55
+ dimensions: 384,
56
+ },
57
+ /** Maximum recall, suitable for >1M vectors or critical accuracy */
58
+ xlarge: {
59
+ m: 48,
60
+ efConstruction: 500,
61
+ efSearch: 200,
62
+ dimensions: 384,
63
+ },
64
+ };
65
+ // ============================================================================
66
+ // Class Skeleton
67
+ // ============================================================================
68
+ /**
69
+ * High-performance embedding storage using HNSW index.
70
+ *
71
+ * Provides O(log n) approximate nearest neighbor search while maintaining
72
+ * compatibility with the existing EmbeddingService interface.
73
+ *
74
+ * @example
75
+ * ```typescript
76
+ * // Basic usage
77
+ * const store = new HNSWEmbeddingStore({
78
+ * dbPath: './embeddings.db',
79
+ * indexPath: './embeddings.hnsw',
80
+ * });
81
+ *
82
+ * // Store embeddings
83
+ * store.storeEmbedding('skill-1', embedding1, 'Jest testing framework helper');
84
+ * store.storeEmbedding('skill-2', embedding2, 'Vitest testing utilities');
85
+ *
86
+ * // Find similar
87
+ * const results = store.findSimilar(queryEmbedding, 10);
88
+ * // [{ skillId: 'skill-1', score: 0.95 }, { skillId: 'skill-2', score: 0.87 }, ...]
89
+ *
90
+ * // Clean up
91
+ * store.close();
92
+ * ```
93
+ *
94
+ * @example
95
+ * ```typescript
96
+ * // With custom HNSW config for large dataset
97
+ * const store = new HNSWEmbeddingStore({
98
+ * dbPath: './embeddings.db',
99
+ * indexPath: './embeddings.hnsw',
100
+ * hnswConfig: HNSW_PRESETS.large,
101
+ * maxElements: 500000,
102
+ * });
103
+ * ```
104
+ */
105
+ export class HNSWEmbeddingStore {
106
+ // -------------------------------------------------------------------------
107
+ // Private Fields
108
+ // -------------------------------------------------------------------------
109
+ /** SQLite database for metadata */
110
+ db = null;
111
+ /** HNSW index instance (from hnswlib-node) */
112
+ index = null;
113
+ /** Whether HNSW is enabled (false = brute-force fallback) */
114
+ hnswEnabled;
115
+ /** Merged HNSW configuration */
116
+ config;
117
+ /** Maximum index capacity */
118
+ maxElements;
119
+ /** Path to HNSW index file */
120
+ indexPath;
121
+ /** Distance metric */
122
+ distanceMetric;
123
+ /** Auto-save flag */
124
+ autoSave;
125
+ /** Map of skill IDs to internal HNSW labels (for reverse lookup) */
126
+ skillIdToLabel = new Map();
127
+ /** Map of HNSW labels to skill IDs */
128
+ labelToSkillId = new Map();
129
+ /** Next available label for HNSW insertion */
130
+ nextLabel = 0;
131
+ // -------------------------------------------------------------------------
132
+ // Constructor
133
+ // -------------------------------------------------------------------------
134
+ /**
135
+ * Create a new HNSWEmbeddingStore instance.
136
+ *
137
+ * @param options - Configuration options
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * // Default configuration (auto-detects HNSW availability)
142
+ * const store = new HNSWEmbeddingStore();
143
+ *
144
+ * // With persistence
145
+ * const store = new HNSWEmbeddingStore({
146
+ * dbPath: './embeddings.db',
147
+ * indexPath: './embeddings.hnsw',
148
+ * });
149
+ *
150
+ * // Force brute-force fallback
151
+ * const store = new HNSWEmbeddingStore({ useHNSW: false });
152
+ * ```
153
+ */
154
+ /** V3 VectorDB instance (if initialized) */
155
+ vectorDB = null;
156
+ /** Promise for async initialization */
157
+ initPromise = null;
158
+ constructor(options = {}) {
159
+ // Determine HNSW mode from options or environment
160
+ this.hnswEnabled = this.shouldUseHNSW(options.useHNSW);
161
+ // Merge configuration with defaults
162
+ this.config = {
163
+ ...DEFAULT_HNSW_CONFIG,
164
+ ...options.hnswConfig,
165
+ };
166
+ this.maxElements = options.maxElements ?? 100000;
167
+ this.indexPath = options.indexPath;
168
+ this.distanceMetric = options.distanceMetric ?? 'cosine';
169
+ this.autoSave = options.autoSave ?? false;
170
+ // Initialize SQLite database
171
+ if (options.dbPath) {
172
+ this.initDatabase(options.dbPath);
173
+ }
174
+ // Initialize HNSW index asynchronously (if enabled)
175
+ if (this.hnswEnabled) {
176
+ this.initPromise = this.initHNSWIndex();
177
+ }
178
+ }
179
+ /**
180
+ * Ensure the store is fully initialized.
181
+ * Call this before operations that require the HNSW index.
182
+ */
183
+ async ensureInitialized() {
184
+ if (this.initPromise) {
185
+ await this.initPromise;
186
+ }
187
+ }
188
+ // -------------------------------------------------------------------------
189
+ // Public Methods (IEmbeddingStore Interface)
190
+ // -------------------------------------------------------------------------
191
+ /**
192
+ * Store an embedding with its metadata.
193
+ *
194
+ * Inserts the vector into both HNSW index (for fast search) and
195
+ * SQLite (for metadata persistence).
196
+ *
197
+ * @param skillId - Unique identifier for the skill
198
+ * @param embedding - Vector embedding (must match configured dimensions)
199
+ * @param text - Original text that was embedded
200
+ * @throws Error if embedding dimensions don't match configuration
201
+ */
202
+ storeEmbedding(skillId, embedding, text) {
203
+ // Validate embedding dimensions
204
+ if (embedding.length !== this.config.dimensions) {
205
+ throw new Error(`Embedding dimension mismatch: got ${embedding.length}, expected ${this.config.dimensions}`);
206
+ }
207
+ // Store in SQLite (metadata + embedding blob for brute-force fallback)
208
+ if (this.db) {
209
+ const buffer = Buffer.from(embedding.buffer);
210
+ const stmt = this.db.prepare(`
211
+ INSERT OR REPLACE INTO skill_embeddings (skill_id, embedding, text, created_at)
212
+ VALUES (?, ?, ?, unixepoch())
213
+ `);
214
+ stmt.run(skillId, buffer, text);
215
+ }
216
+ // Insert into V3 VectorDB (HNSW index)
217
+ if (this.vectorDB) {
218
+ // VectorDB.insert may be sync or async depending on backend
219
+ const result = this.vectorDB.insert(embedding, skillId, { text });
220
+ if (result instanceof Promise) {
221
+ // Fire and forget for sync interface, but log errors
222
+ result.catch((err) => {
223
+ console.warn(`[HNSWEmbeddingStore] Failed to insert into VectorDB: ${err}`);
224
+ });
225
+ }
226
+ }
227
+ }
228
+ /**
229
+ * Retrieve a stored embedding by skill ID.
230
+ *
231
+ * @param skillId - Unique identifier for the skill
232
+ * @returns The embedding if found, null otherwise
233
+ */
234
+ getEmbedding(skillId) {
235
+ if (!this.db)
236
+ return null;
237
+ const stmt = this.db.prepare(`
238
+ SELECT embedding FROM skill_embeddings WHERE skill_id = ?
239
+ `);
240
+ const row = stmt.get(skillId);
241
+ if (!row)
242
+ return null;
243
+ return new Float32Array(row.embedding.buffer.slice(row.embedding.byteOffset, row.embedding.byteOffset + row.embedding.byteLength));
244
+ }
245
+ /**
246
+ * Get all stored embeddings.
247
+ *
248
+ * Note: For large datasets, consider using findSimilar() instead
249
+ * to avoid loading all vectors into memory.
250
+ *
251
+ * @returns Map of skill IDs to their embeddings
252
+ */
253
+ getAllEmbeddings() {
254
+ if (!this.db)
255
+ return new Map();
256
+ const stmt = this.db.prepare(`
257
+ SELECT skill_id, embedding FROM skill_embeddings
258
+ `);
259
+ const rows = stmt.all();
260
+ const result = new Map();
261
+ for (const row of rows) {
262
+ const embedding = new Float32Array(row.embedding.buffer.slice(row.embedding.byteOffset, row.embedding.byteOffset + row.embedding.byteLength));
263
+ result.set(row.skill_id, embedding);
264
+ }
265
+ return result;
266
+ }
267
+ /**
268
+ * Find most similar embeddings to a query vector.
269
+ *
270
+ * Uses HNSW for O(log n) approximate search when available,
271
+ * falls back to O(n) brute-force cosine similarity otherwise.
272
+ *
273
+ * @param queryEmbedding - Query vector (must match configured dimensions)
274
+ * @param topK - Number of results to return (default: 10)
275
+ * @returns Array of skill IDs with similarity scores, sorted descending
276
+ */
277
+ findSimilar(queryEmbedding, topK = 10) {
278
+ // Validate query dimensions
279
+ if (queryEmbedding.length !== this.config.dimensions) {
280
+ throw new Error(`Query dimension mismatch: got ${queryEmbedding.length}, expected ${this.config.dimensions}`);
281
+ }
282
+ // Try HNSW search first (if available)
283
+ if (this.vectorDB) {
284
+ try {
285
+ // VectorDB.search may be sync or async
286
+ const searchResult = this.vectorDB.search(queryEmbedding, topK);
287
+ // Handle async case by returning empty and logging
288
+ // (sync interface limitation - caller should use findSimilarAsync for async)
289
+ if (searchResult instanceof Promise) {
290
+ // For sync interface, fall back to brute-force
291
+ console.warn('[HNSWEmbeddingStore] VectorDB.search returned Promise, using brute-force fallback');
292
+ }
293
+ else {
294
+ // Convert VectorDB results to SimilarityResult format
295
+ return searchResult.map((result) => ({
296
+ skillId: result.id,
297
+ score: result.score,
298
+ }));
299
+ }
300
+ }
301
+ catch (err) {
302
+ console.warn(`[HNSWEmbeddingStore] HNSW search failed, falling back to brute-force: ${err}`);
303
+ }
304
+ }
305
+ // Brute-force fallback: compute cosine similarity for all embeddings
306
+ const allEmbeddings = this.getAllEmbeddings();
307
+ const results = [];
308
+ for (const [skillId, embedding] of allEmbeddings) {
309
+ const score = this.cosineSimilarity(queryEmbedding, embedding);
310
+ results.push({ skillId, score });
311
+ }
312
+ // Sort by similarity score descending and return topK
313
+ results.sort((a, b) => b.score - a.score);
314
+ return results.slice(0, topK);
315
+ }
316
+ /**
317
+ * Async version of findSimilar for backends that require async search.
318
+ *
319
+ * @param queryEmbedding - Query vector (must match configured dimensions)
320
+ * @param topK - Number of results to return (default: 10)
321
+ * @returns Promise resolving to array of skill IDs with similarity scores
322
+ */
323
+ async findSimilarAsync(queryEmbedding, topK = 10) {
324
+ // Ensure HNSW is initialized
325
+ await this.ensureInitialized();
326
+ // Validate query dimensions
327
+ if (queryEmbedding.length !== this.config.dimensions) {
328
+ throw new Error(`Query dimension mismatch: got ${queryEmbedding.length}, expected ${this.config.dimensions}`);
329
+ }
330
+ // Try HNSW search first (if available)
331
+ if (this.vectorDB) {
332
+ try {
333
+ const searchResult = this.vectorDB.search(queryEmbedding, topK);
334
+ const results = searchResult instanceof Promise ? await searchResult : searchResult;
335
+ return results.map((result) => ({
336
+ skillId: result.id,
337
+ score: result.score,
338
+ }));
339
+ }
340
+ catch (err) {
341
+ console.warn(`[HNSWEmbeddingStore] HNSW search failed, falling back to brute-force: ${err}`);
342
+ }
343
+ }
344
+ // Brute-force fallback
345
+ return this.findSimilar(queryEmbedding, topK);
346
+ }
347
+ /**
348
+ * Compute cosine similarity between two embeddings.
349
+ *
350
+ * @param a - First embedding
351
+ * @param b - Second embedding
352
+ * @returns Similarity score between -1 and 1
353
+ * @throws Error if embeddings have different dimensions
354
+ */
355
+ cosineSimilarity(a, b) {
356
+ if (a.length !== b.length) {
357
+ throw new Error(`Embedding dimension mismatch: ${a.length} vs ${b.length}. ` +
358
+ `Expected ${this.config.dimensions}.`);
359
+ }
360
+ let dotProduct = 0;
361
+ let normA = 0;
362
+ let normB = 0;
363
+ for (let i = 0; i < a.length; i++) {
364
+ dotProduct += a[i] * b[i];
365
+ normA += a[i] * a[i];
366
+ normB += b[i] * b[i];
367
+ }
368
+ if (normA === 0 || normB === 0)
369
+ return 0;
370
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
371
+ }
372
+ /**
373
+ * Check if running in fallback (brute-force) mode.
374
+ *
375
+ * @returns true if using brute-force, false if using HNSW
376
+ */
377
+ isUsingFallback() {
378
+ return !this.hnswEnabled || this.index === null;
379
+ }
380
+ /**
381
+ * Close database connections and release resources.
382
+ *
383
+ * Saves HNSW index to disk if indexPath was configured.
384
+ * Safe to call multiple times.
385
+ */
386
+ close() {
387
+ // TODO: SMI-1519 - Implement
388
+ // 1. Save HNSW index if indexPath configured
389
+ // 2. Close SQLite database
390
+ // 3. Clear label mappings
391
+ if (this.db) {
392
+ this.db.close();
393
+ this.db = null;
394
+ }
395
+ }
396
+ // -------------------------------------------------------------------------
397
+ // Extended Public Methods (HNSW-specific)
398
+ // -------------------------------------------------------------------------
399
+ /**
400
+ * Get statistics about the HNSW index.
401
+ *
402
+ * @returns Index statistics including capacity, utilization, and config
403
+ */
404
+ getStats() {
405
+ let vectorCount = 0;
406
+ // Get count from SQLite
407
+ if (this.db) {
408
+ const stmt = this.db.prepare('SELECT COUNT(*) as count FROM skill_embeddings');
409
+ const row = stmt.get();
410
+ vectorCount = row.count;
411
+ }
412
+ // Get count from VectorDB if available
413
+ if (this.vectorDB) {
414
+ try {
415
+ const size = this.vectorDB.size();
416
+ if (!(size instanceof Promise)) {
417
+ vectorCount = Math.max(vectorCount, size);
418
+ }
419
+ }
420
+ catch {
421
+ // Ignore errors, use SQLite count
422
+ }
423
+ }
424
+ const utilizationPercent = this.maxElements > 0 ? (vectorCount / this.maxElements) * 100 : 0;
425
+ // Estimate memory usage (rough approximation)
426
+ // HNSW uses ~(4 * dimensions + M * 4 * 2) bytes per vector
427
+ const bytesPerVector = 4 * this.config.dimensions + this.config.m * 8;
428
+ const memoryUsageBytes = vectorCount * bytesPerVector;
429
+ return {
430
+ vectorCount,
431
+ maxCapacity: this.maxElements,
432
+ utilizationPercent: Math.round(utilizationPercent * 100) / 100,
433
+ m: this.config.m,
434
+ efConstruction: this.config.efConstruction,
435
+ efSearch: this.config.efSearch,
436
+ dimensions: this.config.dimensions,
437
+ memoryUsageBytes,
438
+ isHNSWEnabled: this.vectorDB !== null,
439
+ indexPath: this.indexPath,
440
+ };
441
+ }
442
+ /**
443
+ * Batch insert multiple embeddings efficiently.
444
+ *
445
+ * More efficient than calling storeEmbedding() in a loop
446
+ * due to batched SQLite transactions and HNSW insertions.
447
+ *
448
+ * @param embeddings - Array of embeddings to insert
449
+ * @returns Batch operation result with counts and timing
450
+ */
451
+ batchInsert(embeddings) {
452
+ const startTime = Date.now();
453
+ const result = {
454
+ inserted: 0,
455
+ updated: 0,
456
+ failed: 0,
457
+ errors: [],
458
+ durationMs: 0,
459
+ };
460
+ if (!this.db) {
461
+ result.errors.push({ skillId: '*', error: 'Database not initialized' });
462
+ result.durationMs = Date.now() - startTime;
463
+ return result;
464
+ }
465
+ // Use a transaction for batch SQLite operations
466
+ const insertStmt = this.db.prepare(`
467
+ INSERT OR REPLACE INTO skill_embeddings (skill_id, embedding, text, created_at)
468
+ VALUES (?, ?, ?, unixepoch())
469
+ `);
470
+ const checkStmt = this.db.prepare(`
471
+ SELECT 1 FROM skill_embeddings WHERE skill_id = ?
472
+ `);
473
+ const transaction = this.db.transaction(() => {
474
+ for (const { skillId, embedding, text } of embeddings) {
475
+ try {
476
+ // Validate dimensions
477
+ if (embedding.length !== this.config.dimensions) {
478
+ result.failed++;
479
+ result.errors.push({
480
+ skillId,
481
+ error: `Dimension mismatch: got ${embedding.length}, expected ${this.config.dimensions}`,
482
+ });
483
+ continue;
484
+ }
485
+ // Check if exists (for updated count)
486
+ const exists = checkStmt.get(skillId);
487
+ // Insert into SQLite
488
+ const buffer = Buffer.from(embedding.buffer);
489
+ insertStmt.run(skillId, buffer, text);
490
+ // Insert into VectorDB
491
+ if (this.vectorDB) {
492
+ try {
493
+ this.vectorDB.insert(embedding, skillId, { text });
494
+ }
495
+ catch (err) {
496
+ // Log but don't fail - SQLite is the source of truth
497
+ console.warn(`[HNSWEmbeddingStore] VectorDB insert failed for ${skillId}: ${err}`);
498
+ }
499
+ }
500
+ if (exists) {
501
+ result.updated++;
502
+ }
503
+ else {
504
+ result.inserted++;
505
+ }
506
+ }
507
+ catch (err) {
508
+ result.failed++;
509
+ result.errors.push({
510
+ skillId,
511
+ error: err instanceof Error ? err.message : String(err),
512
+ });
513
+ }
514
+ }
515
+ });
516
+ transaction();
517
+ result.durationMs = Date.now() - startTime;
518
+ return result;
519
+ }
520
+ /**
521
+ * Remove an embedding from the store.
522
+ *
523
+ * Note: HNSW does not support true deletion. The vector is marked
524
+ * as deleted and excluded from search results, but memory is not
525
+ * reclaimed until the index is rebuilt.
526
+ *
527
+ * @param skillId - Unique identifier for the skill to remove
528
+ * @returns true if removed, false if not found
529
+ */
530
+ removeEmbedding(skillId) {
531
+ let removed = false;
532
+ // Remove from SQLite
533
+ if (this.db) {
534
+ const stmt = this.db.prepare('DELETE FROM skill_embeddings WHERE skill_id = ?');
535
+ const result = stmt.run(skillId);
536
+ removed = result.changes > 0;
537
+ }
538
+ // Remove from VectorDB (if supported)
539
+ if (this.vectorDB && removed) {
540
+ try {
541
+ const vdbResult = this.vectorDB.remove(skillId);
542
+ // VectorDB.remove may be sync or async
543
+ if (vdbResult instanceof Promise) {
544
+ vdbResult.catch((err) => {
545
+ console.warn(`[HNSWEmbeddingStore] VectorDB remove failed for ${skillId}: ${err}`);
546
+ });
547
+ }
548
+ }
549
+ catch (err) {
550
+ // Log but don't fail - SQLite is the source of truth
551
+ console.warn(`[HNSWEmbeddingStore] VectorDB remove failed for ${skillId}: ${err}`);
552
+ }
553
+ }
554
+ return removed;
555
+ }
556
+ /**
557
+ * Save the HNSW index to disk.
558
+ *
559
+ * Note: V3 VectorDB manages its own persistence, so this is a no-op
560
+ * unless using hnswlib-node directly.
561
+ *
562
+ * @throws Error if indexPath was not configured
563
+ */
564
+ saveIndex() {
565
+ if (!this.indexPath) {
566
+ throw new Error('Cannot save index: indexPath not configured');
567
+ }
568
+ // V3 VectorDB handles its own persistence
569
+ // For hnswlib-node, we would call index.saveIndex(this.indexPath)
570
+ console.log(`[HNSWEmbeddingStore] Index persistence managed by V3 VectorDB backend`);
571
+ }
572
+ /**
573
+ * Load the HNSW index from disk.
574
+ *
575
+ * Note: V3 VectorDB manages its own persistence, so this is a no-op
576
+ * unless using hnswlib-node directly.
577
+ *
578
+ * @throws Error if indexPath was not configured or file doesn't exist
579
+ */
580
+ loadIndex() {
581
+ if (!this.indexPath) {
582
+ throw new Error('Cannot load index: indexPath not configured');
583
+ }
584
+ // V3 VectorDB handles its own persistence
585
+ // For hnswlib-node, we would call index.loadIndex(this.indexPath)
586
+ console.log(`[HNSWEmbeddingStore] Index persistence managed by V3 VectorDB backend`);
587
+ }
588
+ /**
589
+ * Rebuild the HNSW index from SQLite data.
590
+ *
591
+ * Useful after many deletions to reclaim memory, or to apply
592
+ * new HNSW configuration parameters.
593
+ *
594
+ * @param newConfig - Optional new HNSW configuration
595
+ */
596
+ async rebuildIndex(newConfig) {
597
+ // Update config if provided
598
+ if (newConfig) {
599
+ Object.assign(this.config, newConfig);
600
+ }
601
+ // Clear existing VectorDB
602
+ if (this.vectorDB) {
603
+ try {
604
+ const clearResult = this.vectorDB.clear();
605
+ if (clearResult instanceof Promise) {
606
+ await clearResult;
607
+ }
608
+ }
609
+ catch (err) {
610
+ console.warn(`[HNSWEmbeddingStore] Failed to clear VectorDB: ${err}`);
611
+ }
612
+ }
613
+ // Reinitialize VectorDB
614
+ await this.initHNSWIndex();
615
+ // Re-insert all embeddings from SQLite
616
+ if (this.db && this.vectorDB) {
617
+ const allEmbeddings = this.getAllEmbeddings();
618
+ for (const [skillId, embedding] of allEmbeddings) {
619
+ try {
620
+ const result = this.vectorDB.insert(embedding, skillId);
621
+ if (result instanceof Promise) {
622
+ await result;
623
+ }
624
+ }
625
+ catch (err) {
626
+ console.warn(`[HNSWEmbeddingStore] Failed to reinsert ${skillId}: ${err}`);
627
+ }
628
+ }
629
+ }
630
+ }
631
+ /**
632
+ * Update efSearch parameter at runtime.
633
+ *
634
+ * Note: V3 VectorDB does not expose efSearch tuning directly.
635
+ * This method is provided for API compatibility.
636
+ *
637
+ * @param efSearch - New efSearch value (must be > 0)
638
+ */
639
+ setEfSearch(efSearch) {
640
+ if (efSearch <= 0) {
641
+ throw new Error('efSearch must be > 0');
642
+ }
643
+ this.config.efSearch = efSearch;
644
+ // V3 VectorDB doesn't expose efSearch tuning
645
+ // For hnswlib-node, we would call index.setEfSearch(efSearch)
646
+ console.log(`[HNSWEmbeddingStore] efSearch updated to ${efSearch} (will apply on next search)`);
647
+ }
648
+ // -------------------------------------------------------------------------
649
+ // Private Methods
650
+ // -------------------------------------------------------------------------
651
+ /**
652
+ * Determine whether to use HNSW based on explicit option or environment.
653
+ */
654
+ shouldUseHNSW(explicit) {
655
+ if (explicit !== undefined) {
656
+ return explicit;
657
+ }
658
+ // Check environment variable
659
+ const envValue = process.env.SKILLSMITH_USE_HNSW;
660
+ if (envValue !== undefined) {
661
+ return envValue === 'true' || envValue === '1';
662
+ }
663
+ // Default to false (use brute-force) for backward compatibility
664
+ // TODO: Consider changing default to true in future version
665
+ return false;
666
+ }
667
+ /**
668
+ * Initialize SQLite database and create tables.
669
+ */
670
+ initDatabase(dbPath) {
671
+ this.db = new Database(dbPath);
672
+ // Create skill_embeddings table
673
+ this.db.exec(`
674
+ CREATE TABLE IF NOT EXISTS skill_embeddings (
675
+ skill_id TEXT PRIMARY KEY,
676
+ embedding BLOB NOT NULL,
677
+ text TEXT NOT NULL,
678
+ created_at INTEGER DEFAULT (unixepoch())
679
+ )
680
+ `);
681
+ // Create index for fast lookups
682
+ this.db.exec(`
683
+ CREATE INDEX IF NOT EXISTS idx_skill_embeddings_id
684
+ ON skill_embeddings(skill_id)
685
+ `);
686
+ }
687
+ /**
688
+ * Initialize HNSW index using V3 VectorDB API.
689
+ * Falls back gracefully if V3 is unavailable.
690
+ */
691
+ async initHNSWIndex() {
692
+ try {
693
+ // Dynamically import V3 VectorDB module
694
+ const vectorDbModule = await import('claude-flow/v3/@claude-flow/cli/dist/src/ruvector/vector-db.js');
695
+ // Load ruvector backend (may use WASM acceleration)
696
+ const loaded = await vectorDbModule.loadRuVector();
697
+ if (!loaded) {
698
+ console.warn('[HNSWEmbeddingStore] ruvector not available, using fallback backend');
699
+ }
700
+ // Create VectorDB instance
701
+ this.vectorDB = await vectorDbModule.createVectorDB(this.config.dimensions);
702
+ // Log status
703
+ const status = vectorDbModule.getStatus();
704
+ console.log(`[HNSWEmbeddingStore] Initialized with backend: ${status.backend}` +
705
+ (status.wasmAccelerated ? ' (WASM accelerated)' : ''));
706
+ // Re-populate VectorDB from SQLite if we have existing data
707
+ if (this.db) {
708
+ const count = this.db.prepare('SELECT COUNT(*) as c FROM skill_embeddings').get();
709
+ if (count.c > 0) {
710
+ console.log(`[HNSWEmbeddingStore] Rebuilding index from ${count.c} existing embeddings...`);
711
+ const allEmbeddings = this.getAllEmbeddings();
712
+ for (const [skillId, embedding] of allEmbeddings) {
713
+ try {
714
+ const result = this.vectorDB.insert(embedding, skillId);
715
+ if (result instanceof Promise) {
716
+ await result;
717
+ }
718
+ }
719
+ catch (err) {
720
+ console.warn(`[HNSWEmbeddingStore] Failed to insert ${skillId}: ${err}`);
721
+ }
722
+ }
723
+ console.log(`[HNSWEmbeddingStore] Index rebuilt with ${allEmbeddings.size} vectors`);
724
+ }
725
+ }
726
+ }
727
+ catch (err) {
728
+ // V3 VectorDB not available - will use brute-force fallback
729
+ console.warn(`[HNSWEmbeddingStore] Failed to initialize V3 VectorDB, using brute-force fallback: ${err}`);
730
+ this.vectorDB = null;
731
+ }
732
+ }
733
+ /**
734
+ * Convert HNSW distance to similarity score.
735
+ * HNSW returns distances, we need similarities (higher = more similar).
736
+ */
737
+ distanceToSimilarity(distance) {
738
+ // For cosine space, HNSW returns 1 - cosine_similarity
739
+ // So similarity = 1 - distance
740
+ if (this.distanceMetric === 'cosine') {
741
+ return 1 - distance;
742
+ }
743
+ // For L2/IP, need different conversion
744
+ // TODO: Implement for other metrics
745
+ return 1 / (1 + distance);
746
+ }
747
+ }
748
+ // ============================================================================
749
+ // Factory Functions
750
+ // ============================================================================
751
+ /**
752
+ * Create an HNSWEmbeddingStore with a preset configuration.
753
+ *
754
+ * @param preset - Preset name ('small', 'medium', 'large', 'xlarge')
755
+ * @param options - Additional options (merged with preset)
756
+ * @returns Configured HNSWEmbeddingStore instance
757
+ *
758
+ * @example
759
+ * ```typescript
760
+ * const store = createHNSWStore('large', {
761
+ * dbPath: './embeddings.db',
762
+ * indexPath: './embeddings.hnsw',
763
+ * });
764
+ * ```
765
+ */
766
+ export function createHNSWStore(preset, options = {}) {
767
+ return new HNSWEmbeddingStore({
768
+ ...options,
769
+ hnswConfig: HNSW_PRESETS[preset],
770
+ });
771
+ }
772
+ /**
773
+ * Check if hnswlib-node is available.
774
+ *
775
+ * Useful for conditional logic or graceful degradation.
776
+ *
777
+ * @returns true if hnswlib-node can be loaded
778
+ */
779
+ export async function isHNSWAvailable() {
780
+ try {
781
+ // Dynamic import to check availability without static analysis errors
782
+ await Function('return import("hnswlib-node")')();
783
+ return true;
784
+ }
785
+ catch {
786
+ return false;
787
+ }
788
+ }
789
+ /**
790
+ * Dynamically load hnswlib-node module.
791
+ *
792
+ * @returns The HierarchicalNSW constructor, or null if unavailable
793
+ * @internal
794
+ */
795
+ export async function loadHNSWLib() {
796
+ try {
797
+ // Dynamic import to avoid TypeScript static analysis
798
+ const mod = await Function('return import("hnswlib-node")')();
799
+ return mod;
800
+ }
801
+ catch {
802
+ return null;
803
+ }
804
+ }
805
+ //# sourceMappingURL=hnsw-store.js.map