@skillsmith/core 0.5.8 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/src/audit/exclusions.d.ts +67 -0
- package/dist/src/audit/exclusions.d.ts.map +1 -0
- package/dist/src/audit/exclusions.js +133 -0
- package/dist/src/audit/exclusions.js.map +1 -0
- package/dist/src/audit/exclusions.types.d.ts +45 -0
- package/dist/src/audit/exclusions.types.d.ts.map +1 -0
- package/dist/src/audit/exclusions.types.js +11 -0
- package/dist/src/audit/exclusions.types.js.map +1 -0
- package/dist/src/audit/index.d.ts +15 -0
- package/dist/src/audit/index.d.ts.map +1 -0
- package/dist/src/audit/index.js +14 -0
- package/dist/src/audit/index.js.map +1 -0
- package/dist/src/benchmarks/embeddingBenchmark.d.ts.map +1 -1
- package/dist/src/benchmarks/embeddingBenchmark.js +5 -2
- package/dist/src/benchmarks/embeddingBenchmark.js.map +1 -1
- package/dist/src/config/audit-mode.d.ts +71 -0
- package/dist/src/config/audit-mode.d.ts.map +1 -0
- package/dist/src/config/audit-mode.js +69 -0
- package/dist/src/config/audit-mode.js.map +1 -0
- package/dist/src/config/index.d.ts +13 -0
- package/dist/src/config/index.d.ts.map +1 -1
- package/dist/src/config/index.js +24 -0
- package/dist/src/config/index.js.map +1 -1
- package/dist/src/db/migration-runner.d.ts +9 -2
- package/dist/src/db/migration-runner.d.ts.map +1 -1
- package/dist/src/db/migration-runner.js +36 -3
- package/dist/src/db/migration-runner.js.map +1 -1
- package/dist/src/db/migration.d.ts.map +1 -1
- package/dist/src/db/migration.js +9 -1
- package/dist/src/db/migration.js.map +1 -1
- package/dist/src/db/migrations/v16-skill-source.d.ts +41 -0
- package/dist/src/db/migrations/v16-skill-source.d.ts.map +1 -0
- package/dist/src/db/migrations/v16-skill-source.js +87 -0
- package/dist/src/db/migrations/v16-skill-source.js.map +1 -0
- package/dist/src/db/migrations/v17-curated-trust-tier.d.ts +44 -0
- package/dist/src/db/migrations/v17-curated-trust-tier.d.ts.map +1 -0
- package/dist/src/db/migrations/v17-curated-trust-tier.js +89 -0
- package/dist/src/db/migrations/v17-curated-trust-tier.js.map +1 -0
- package/dist/src/db/schema-sql.d.ts +1 -1
- package/dist/src/db/schema-sql.d.ts.map +1 -1
- package/dist/src/db/schema-sql.js +2 -1
- package/dist/src/db/schema-sql.js.map +1 -1
- package/dist/src/db/schema.d.ts +9 -3
- package/dist/src/db/schema.d.ts.map +1 -1
- package/dist/src/db/schema.js +13 -2
- package/dist/src/db/schema.js.map +1 -1
- package/dist/src/embeddings/embedding-utils.d.ts +23 -0
- package/dist/src/embeddings/embedding-utils.d.ts.map +1 -1
- package/dist/src/embeddings/embedding-utils.js +39 -0
- package/dist/src/embeddings/embedding-utils.js.map +1 -1
- package/dist/src/embeddings/hnsw-search.d.ts +133 -0
- package/dist/src/embeddings/hnsw-search.d.ts.map +1 -0
- package/dist/src/embeddings/hnsw-search.js +387 -0
- package/dist/src/embeddings/hnsw-search.js.map +1 -0
- package/dist/src/embeddings/hnsw-store.d.ts +9 -3
- package/dist/src/embeddings/hnsw-store.d.ts.map +1 -1
- package/dist/src/embeddings/hnsw-store.js +17 -7
- package/dist/src/embeddings/hnsw-store.js.map +1 -1
- package/dist/src/embeddings/hnsw-store.types.d.ts +17 -4
- package/dist/src/embeddings/hnsw-store.types.d.ts.map +1 -1
- package/dist/src/embeddings/hnsw-store.types.js.map +1 -1
- package/dist/src/embeddings/index.d.ts +50 -4
- package/dist/src/embeddings/index.d.ts.map +1 -1
- package/dist/src/embeddings/index.js +166 -24
- package/dist/src/embeddings/index.js.map +1 -1
- package/dist/src/exports/services.d.ts +1 -1
- package/dist/src/exports/services.d.ts.map +1 -1
- package/dist/src/exports/services.js.map +1 -1
- package/dist/src/index.d.ts +4 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +5 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/indexer/SkillParser.d.ts +7 -70
- package/dist/src/indexer/SkillParser.d.ts.map +1 -1
- package/dist/src/indexer/SkillParser.helpers.d.ts +13 -0
- package/dist/src/indexer/SkillParser.helpers.d.ts.map +1 -0
- package/dist/src/indexer/SkillParser.helpers.js +90 -0
- package/dist/src/indexer/SkillParser.helpers.js.map +1 -0
- package/dist/src/indexer/SkillParser.js +5 -82
- package/dist/src/indexer/SkillParser.js.map +1 -1
- package/dist/src/indexer/SkillParser.types.d.ts +78 -0
- package/dist/src/indexer/SkillParser.types.d.ts.map +1 -0
- package/dist/src/indexer/SkillParser.types.js +9 -0
- package/dist/src/indexer/SkillParser.types.js.map +1 -0
- package/dist/src/install/fan-out.d.ts +81 -0
- package/dist/src/install/fan-out.d.ts.map +1 -0
- package/dist/src/install/fan-out.js +236 -0
- package/dist/src/install/fan-out.js.map +1 -0
- package/dist/src/install/index.d.ts +16 -0
- package/dist/src/install/index.d.ts.map +1 -0
- package/dist/src/install/index.js +14 -0
- package/dist/src/install/index.js.map +1 -0
- package/dist/src/install/paths.d.ts +16 -0
- package/dist/src/install/paths.d.ts.map +1 -0
- package/dist/src/install/paths.js +56 -0
- package/dist/src/install/paths.js.map +1 -0
- package/dist/src/repositories/AdvisoryRepository.test.js +2 -2
- package/dist/src/repositories/AdvisoryRepository.test.js.map +1 -1
- package/dist/src/repositories/SkillRepository.d.ts.map +1 -1
- package/dist/src/repositories/SkillRepository.js +12 -4
- package/dist/src/repositories/SkillRepository.js.map +1 -1
- package/dist/src/search/hybrid.js +2 -2
- package/dist/src/search/hybrid.js.map +1 -1
- package/dist/src/security/pathValidation.d.ts +6 -1
- package/dist/src/security/pathValidation.d.ts.map +1 -1
- package/dist/src/security/pathValidation.js +6 -1
- package/dist/src/security/pathValidation.js.map +1 -1
- package/dist/src/services/skill-installation.errors.d.ts +53 -0
- package/dist/src/services/skill-installation.errors.d.ts.map +1 -0
- package/dist/src/services/skill-installation.errors.js +63 -0
- package/dist/src/services/skill-installation.errors.js.map +1 -0
- package/dist/src/services/skill-installation.service.d.ts.map +1 -1
- package/dist/src/services/skill-installation.service.js +33 -35
- package/dist/src/services/skill-installation.service.js.map +1 -1
- package/dist/src/services/skill-installation.types.d.ts +16 -0
- package/dist/src/services/skill-installation.types.d.ts.map +1 -1
- package/dist/src/services/skill-installation.types.js.map +1 -1
- package/dist/src/skills/index-local.d.ts +107 -0
- package/dist/src/skills/index-local.d.ts.map +1 -0
- package/dist/src/skills/index-local.js +208 -0
- package/dist/src/skills/index-local.js.map +1 -0
- package/dist/src/sync/SyncEngine.d.ts.map +1 -1
- package/dist/src/sync/SyncEngine.js +20 -3
- package/dist/src/sync/SyncEngine.js.map +1 -1
- package/dist/src/types/skill.d.ts +20 -5
- package/dist/src/types/skill.d.ts.map +1 -1
- package/dist/src/types/skill.js.map +1 -1
- package/dist/tests/EmbeddingService.test.js +1 -1
- package/dist/tests/EmbeddingService.test.js.map +1 -1
- package/dist/tests/SkillVersionRepository.test.js +2 -2
- package/dist/tests/SkillVersionRepository.test.js.map +1 -1
- package/dist/tests/db/migration.test.js +13 -5
- package/dist/tests/db/migration.test.js.map +1 -1
- package/dist/tests/embeddings/hnsw-bench-gate.test.d.ts +15 -0
- package/dist/tests/embeddings/hnsw-bench-gate.test.d.ts.map +1 -0
- package/dist/tests/embeddings/hnsw-bench-gate.test.js +117 -0
- package/dist/tests/embeddings/hnsw-bench-gate.test.js.map +1 -0
- package/dist/tests/embeddings/hnsw-integration.test.d.ts +14 -0
- package/dist/tests/embeddings/hnsw-integration.test.d.ts.map +1 -0
- package/dist/tests/embeddings/hnsw-integration.test.js +160 -0
- package/dist/tests/embeddings/hnsw-integration.test.js.map +1 -0
- package/dist/tests/embeddings/hnsw-vs-brute-force.bench.d.ts +15 -0
- package/dist/tests/embeddings/hnsw-vs-brute-force.bench.d.ts.map +1 -0
- package/dist/tests/embeddings/hnsw-vs-brute-force.bench.js +64 -0
- package/dist/tests/embeddings/hnsw-vs-brute-force.bench.js.map +1 -0
- package/dist/tests/embeddings/seed-bench.d.ts +15 -0
- package/dist/tests/embeddings/seed-bench.d.ts.map +1 -0
- package/dist/tests/embeddings/seed-bench.js +61 -0
- package/dist/tests/embeddings/seed-bench.js.map +1 -0
- package/dist/tests/helpers/database.d.ts +11 -2
- package/dist/tests/helpers/database.d.ts.map +1 -1
- package/dist/tests/helpers/database.js +23 -7
- package/dist/tests/helpers/database.js.map +1 -1
- package/dist/tests/install/fan-out.test.d.ts +2 -0
- package/dist/tests/install/fan-out.test.d.ts.map +1 -0
- package/dist/tests/install/fan-out.test.js +238 -0
- package/dist/tests/install/fan-out.test.js.map +1 -0
- package/dist/tests/install/paths.test.d.ts +2 -0
- package/dist/tests/install/paths.test.d.ts.map +1 -0
- package/dist/tests/install/paths.test.js +79 -0
- package/dist/tests/install/paths.test.js.map +1 -0
- package/dist/tests/repositories/CoInstallRepository.test.js +2 -2
- package/dist/tests/repositories/CoInstallRepository.test.js.map +1 -1
- package/dist/tests/repositories/SkillDependencyRepository.test.js +2 -2
- package/dist/tests/repositories/SkillDependencyRepository.test.js.map +1 -1
- package/dist/tests/schema.test.js +12 -0
- package/dist/tests/schema.test.js.map +1 -1
- package/dist/tests/skill-scanner/allowlist.test.js +27 -4
- package/dist/tests/skill-scanner/allowlist.test.js.map +1 -1
- package/dist/tests/unit/audit/exclusions.test.d.ts +7 -0
- package/dist/tests/unit/audit/exclusions.test.d.ts.map +1 -0
- package/dist/tests/unit/audit/exclusions.test.js +157 -0
- package/dist/tests/unit/audit/exclusions.test.js.map +1 -0
- package/dist/tests/unit/config/audit-mode.test.d.ts +11 -0
- package/dist/tests/unit/config/audit-mode.test.d.ts.map +1 -0
- package/dist/tests/unit/config/audit-mode.test.js +86 -0
- package/dist/tests/unit/config/audit-mode.test.js.map +1 -0
- package/dist/tests/unit/migrations/migration-v16.test.d.ts +11 -0
- package/dist/tests/unit/migrations/migration-v16.test.d.ts.map +1 -0
- package/dist/tests/unit/migrations/migration-v16.test.js +139 -0
- package/dist/tests/unit/migrations/migration-v16.test.js.map +1 -0
- package/dist/tests/unit/migrations/migration-v17.test.d.ts +16 -0
- package/dist/tests/unit/migrations/migration-v17.test.d.ts.map +1 -0
- package/dist/tests/unit/migrations/migration-v17.test.js +256 -0
- package/dist/tests/unit/migrations/migration-v17.test.js.map +1 -0
- package/dist/tests/unit/migrations/v10-dependencies.test.js +2 -2
- package/dist/tests/unit/migrations/v10-dependencies.test.js.map +1 -1
- package/dist/tests/unit/services/skill-installation-extended.test.js +1 -1
- package/dist/tests/unit/services/skill-installation-extended.test.js.map +1 -1
- package/dist/tests/unit/services/skill-installation.service.test.js +113 -1
- package/dist/tests/unit/services/skill-installation.service.test.js.map +1 -1
- package/dist/tests/unit/skills/index-local.test.d.ts +11 -0
- package/dist/tests/unit/skills/index-local.test.d.ts.map +1 -0
- package/dist/tests/unit/skills/index-local.test.js +88 -0
- package/dist/tests/unit/skills/index-local.test.js.map +1 -0
- package/dist/tests/unit/sync-engine.source-aware.test.d.ts +11 -0
- package/dist/tests/unit/sync-engine.source-aware.test.d.ts.map +1 -0
- package/dist/tests/unit/sync-engine.source-aware.test.js +147 -0
- package/dist/tests/unit/sync-engine.source-aware.test.js.map +1 -0
- package/package.json +35 -12
package/dist/src/db/schema.js
CHANGED
|
@@ -22,7 +22,10 @@ import { runMigrations, runMigrationsSafe } from './migration-runner.js';
|
|
|
22
22
|
// v11: SMI-3510 content hash verification column
|
|
23
23
|
// v12: SMI-3864 risk score history for trend detection
|
|
24
24
|
// v13: SMI-3896 visibility and team_id for private skills
|
|
25
|
-
|
|
25
|
+
// v14, v15: reserved (RBAC, integrations)
|
|
26
|
+
// v16: SMI-4665 source column + extend trust_tier CHECK to allow 'local'
|
|
27
|
+
// v17: SMI-4917 widen trust_tier CHECK to allow 'curated'
|
|
28
|
+
export const SCHEMA_VERSION = 17;
|
|
26
29
|
/**
|
|
27
30
|
* Initialize the database with the complete schema.
|
|
28
31
|
*
|
|
@@ -75,9 +78,17 @@ export function openDatabase(path) {
|
|
|
75
78
|
return db;
|
|
76
79
|
}
|
|
77
80
|
/**
|
|
78
|
-
* Close the database connection safely
|
|
81
|
+
* Close the database connection safely.
|
|
82
|
+
*
|
|
83
|
+
* SMI-4640: Tolerate `undefined` so a failed `beforeEach` (e.g. native-module
|
|
84
|
+
* load error in `createTestDatabase`) is reported as the original error instead
|
|
85
|
+
* of being shadowed by a `Cannot read properties of undefined (reading 'close')`
|
|
86
|
+
* cascade in `afterEach`. The underlying create-side error already carries the
|
|
87
|
+
* actionable diagnostic (see `createDatabaseSync`).
|
|
79
88
|
*/
|
|
80
89
|
export function closeDatabase(db) {
|
|
90
|
+
if (db === undefined || db === null)
|
|
91
|
+
return;
|
|
81
92
|
db.close();
|
|
82
93
|
}
|
|
83
94
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/db/schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EACL,kBAAkB,EAClB,mBAAmB,IAAI,0BAA0B,GAClD,MAAM,qBAAqB,CAAA;AAE5B,0EAA0E;AAC1E,OAAO,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AAChE,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAE5C,yEAAyE;AACzE,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,aAAa,EACb,iBAAiB,GAClB,MAAM,uBAAuB,CAAA;AAG9B,yFAAyF;AACzF,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAIxE,iDAAiD;AACjD,uDAAuD;AACvD,0DAA0D;AAC1D,MAAM,CAAC,MAAM,cAAc,GAAG,EAAE,CAAA;AAEhC;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAgB;IAC/C,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IACnB,EAAE,CAAC,OAAO,CAAC,2DAA2D,CAAC,CAAC,GAAG,EAAE,CAAA;IAC7E,aAAa,CAAC,EAAE,CAAC,CAAA;AACnB,CAAC;AAED,qFAAqF;AACrF,MAAM,UAAU,cAAc,CAAC,OAAe,UAAU;IACtD,MAAM,EAAE,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;IAEnC,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAA;IAE9B,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,CAAC,CAAA;IAEpB,OAAO,EAAE,CAAA;AACX,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,EAAE,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;IAEnC,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAA;IAE9B,uCAAuC;IACvC,MAAM,gBAAgB,GAAG,EAAE;SACxB,OAAO,CAAC,6EAA6E,CAAC;SACtF,GAAG,EAAE,CAAA;IAER,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,6EAA6E;QAC7E,mDAAmD;QACnD,EAAE,CAAC,IAAI,CAAC;;;;;;KAMP,CAAC,CAAA;IACJ,CAAC;IAED,gCAAgC;IAChC,iBAAiB,CAAC,EAAE,CAAC,CAAA;IAErB,OAAO,EAAE,CAAA;AACX,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../../src/db/schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EACL,kBAAkB,EAClB,mBAAmB,IAAI,0BAA0B,GAClD,MAAM,qBAAqB,CAAA;AAE5B,0EAA0E;AAC1E,OAAO,EAAE,UAAU,EAAE,kBAAkB,EAAE,MAAM,iBAAiB,CAAA;AAChE,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAA;AAE5C,yEAAyE;AACzE,OAAO,EACL,UAAU,EACV,gBAAgB,EAChB,aAAa,EACb,iBAAiB,GAClB,MAAM,uBAAuB,CAAA;AAG9B,yFAAyF;AACzF,OAAO,EAAE,aAAa,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAIxE,iDAAiD;AACjD,uDAAuD;AACvD,0DAA0D;AAC1D,0CAA0C;AAC1C,yEAAyE;AACzE,0DAA0D;AAC1D,MAAM,CAAC,MAAM,cAAc,GAAG,EAAE,CAAA;AAEhC;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,EAAgB;IAC/C,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IACnB,EAAE,CAAC,OAAO,CAAC,2DAA2D,CAAC,CAAC,GAAG,EAAE,CAAA;IAC7E,aAAa,CAAC,EAAE,CAAC,CAAA;AACnB,CAAC;AAED,qFAAqF;AACrF,MAAM,UAAU,cAAc,CAAC,OAAe,UAAU;IACtD,MAAM,EAAE,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;IAEnC,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAA;IAE9B,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,CAAC,CAAA;IAEpB,OAAO,EAAE,CAAA;AACX,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,MAAM,EAAE,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;IAEnC,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAA;IAE9B,uCAAuC;IACvC,MAAM,gBAAgB,GAAG,EAAE;SACxB,OAAO,CAAC,6EAA6E,CAAC;SACtF,GAAG,EAAE,CAAA;IAER,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,6EAA6E;QAC7E,mDAAmD;QACnD,EAAE,CAAC,IAAI,CAAC;;;;;;KAMP,CAAC,CAAA;IACJ,CAAC;IAED,gCAAgC;IAChC,iBAAiB,CAAC,EAAE,CAAC,CAAA;IAErB,OAAO,EAAE,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,aAAa,CAAC,EAAmC;IAC/D,IAAI,EAAE,KAAK,SAAS,IAAI,EAAE,KAAK,IAAI;QAAE,OAAM;IAC3C,EAAE,CAAC,KAAK,EAAE,CAAA;AACZ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,OAAe,UAAU;IACjE,MAAM,EAAE,GAAG,MAAM,0BAA0B,CAAC,IAAI,CAAC,CAAA;IAEjD,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAA;IAE9B,oBAAoB;IACpB,gBAAgB,CAAC,EAAE,CAAC,CAAA;IAEpB,OAAO,EAAE,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,IAAY;IAClD,MAAM,EAAE,GAAG,MAAM,0BAA0B,CAAC,IAAI,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAA;IAE1E,sBAAsB;IACtB,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAA;IAE9B,uCAAuC;IACvC,MAAM,gBAAgB,GAAG,EAAE;SACxB,OAAO,CAAC,6EAA6E,CAAC;SACtF,GAAG,EAAE,CAAA;IAER,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,iEAAiE;QACjE,EAAE,CAAC,IAAI,CAAC;;;;;;KAMP,CAAC,CAAA;IACJ,CAAC;IAED,gCAAgC;IAChC,iBAAiB,CAAC,EAAE,CAAC,CAAA;IAErB,OAAO,EAAE,CAAA;AACX,CAAC"}
|
|
@@ -39,4 +39,27 @@ export declare function checkTransformersAvailability(): Promise<boolean>;
|
|
|
39
39
|
* Get the error that occurred when loading the transformers module, if any.
|
|
40
40
|
*/
|
|
41
41
|
export declare function getTransformersLoadError(): Error | null;
|
|
42
|
+
/**
|
|
43
|
+
* Cosine similarity between two vectors of equal length.
|
|
44
|
+
*
|
|
45
|
+
* SMI-4577: extracted from `EmbeddingService.cosineSimilarity` so the brute-
|
|
46
|
+
* force fallback in this module can call it directly without a class
|
|
47
|
+
* instance, and to keep `index.ts` under the 500-line cap.
|
|
48
|
+
*
|
|
49
|
+
* @throws if vectors have different lengths
|
|
50
|
+
* @returns 0 when either vector has zero magnitude
|
|
51
|
+
*/
|
|
52
|
+
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
53
|
+
/**
|
|
54
|
+
* Brute-force top-K nearest-neighbour search over an embedding map. Used as
|
|
55
|
+
* the deterministic fallback when HNSW is unavailable or explicitly disabled.
|
|
56
|
+
*
|
|
57
|
+
* SMI-4577: extracted from `EmbeddingService.findSimilarBruteForce` so the
|
|
58
|
+
* pure search algorithm doesn't need a class instance. The wrapper method on
|
|
59
|
+
* `EmbeddingService` still exists for backwards compatibility.
|
|
60
|
+
*/
|
|
61
|
+
export declare function findSimilarBruteForceFromMap(embeddings: Map<string, Float32Array>, queryEmbedding: Float32Array, topK: number): Array<{
|
|
62
|
+
skillId: string;
|
|
63
|
+
score: number;
|
|
64
|
+
}>;
|
|
42
65
|
//# sourceMappingURL=embedding-utils.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedding-utils.d.ts","sourceRoot":"","sources":["../../../src/embeddings/embedding-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,CAAC,EAAE,OAAO,GAAG,OAAO,CAW7D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAQ7C;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,YAAY,CAuBnF;AAQD;;;GAGG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CACrD,cAAc,2BAA2B,CAAC,GAAG,IAAI,CAClD,CA2BA;AAED;;;;;GAKG;AACH,wBAAgB,uBAAuB,IAAI,OAAO,GAAG,SAAS,CAI7D;AAED;;;;;GAKG;AACH,wBAAsB,6BAA6B,IAAI,OAAO,CAAC,OAAO,CAAC,CAGtE;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,KAAK,GAAG,IAAI,CAEvD"}
|
|
1
|
+
{"version":3,"file":"embedding-utils.d.ts","sourceRoot":"","sources":["../../../src/embeddings/embedding-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,CAAC,EAAE,OAAO,GAAG,OAAO,CAW7D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAQ7C;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,YAAY,CAuBnF;AAQD;;;GAGG;AACH,wBAAsB,sBAAsB,IAAI,OAAO,CACrD,cAAc,2BAA2B,CAAC,GAAG,IAAI,CAClD,CA2BA;AAED;;;;;GAKG;AACH,wBAAgB,uBAAuB,IAAI,OAAO,GAAG,SAAS,CAI7D;AAED;;;;;GAKG;AACH,wBAAsB,6BAA6B,IAAI,OAAO,CAAC,OAAO,CAAC,CAGtE;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,KAAK,GAAG,IAAI,CAEvD;AAED;;;;;;;;;GASG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAYzE;AAED;;;;;;;GAOG;AACH,wBAAgB,4BAA4B,CAC1C,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EACrC,cAAc,EAAE,YAAY,EAC5B,IAAI,EAAE,MAAM,GACX,KAAK,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAO3C"}
|
|
@@ -118,4 +118,43 @@ export async function checkTransformersAvailability() {
|
|
|
118
118
|
export function getTransformersLoadError() {
|
|
119
119
|
return pipelineLoadError;
|
|
120
120
|
}
|
|
121
|
+
/**
|
|
122
|
+
* Cosine similarity between two vectors of equal length.
|
|
123
|
+
*
|
|
124
|
+
* SMI-4577: extracted from `EmbeddingService.cosineSimilarity` so the brute-
|
|
125
|
+
* force fallback in this module can call it directly without a class
|
|
126
|
+
* instance, and to keep `index.ts` under the 500-line cap.
|
|
127
|
+
*
|
|
128
|
+
* @throws if vectors have different lengths
|
|
129
|
+
* @returns 0 when either vector has zero magnitude
|
|
130
|
+
*/
|
|
131
|
+
export function cosineSimilarity(a, b) {
|
|
132
|
+
if (a.length !== b.length)
|
|
133
|
+
throw new Error('Embeddings must have same dimension');
|
|
134
|
+
let dotProduct = 0, normA = 0, normB = 0;
|
|
135
|
+
for (let i = 0; i < a.length; i++) {
|
|
136
|
+
dotProduct += a[i] * b[i];
|
|
137
|
+
normA += a[i] * a[i];
|
|
138
|
+
normB += b[i] * b[i];
|
|
139
|
+
}
|
|
140
|
+
if (normA === 0 || normB === 0)
|
|
141
|
+
return 0;
|
|
142
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Brute-force top-K nearest-neighbour search over an embedding map. Used as
|
|
146
|
+
* the deterministic fallback when HNSW is unavailable or explicitly disabled.
|
|
147
|
+
*
|
|
148
|
+
* SMI-4577: extracted from `EmbeddingService.findSimilarBruteForce` so the
|
|
149
|
+
* pure search algorithm doesn't need a class instance. The wrapper method on
|
|
150
|
+
* `EmbeddingService` still exists for backwards compatibility.
|
|
151
|
+
*/
|
|
152
|
+
export function findSimilarBruteForceFromMap(embeddings, queryEmbedding, topK) {
|
|
153
|
+
const results = [];
|
|
154
|
+
for (const [skillId, embedding] of embeddings) {
|
|
155
|
+
results.push({ skillId, score: cosineSimilarity(queryEmbedding, embedding) });
|
|
156
|
+
}
|
|
157
|
+
results.sort((a, b) => b.score - a.score);
|
|
158
|
+
return results.slice(0, topK);
|
|
159
|
+
}
|
|
121
160
|
//# sourceMappingURL=embedding-utils.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embedding-utils.js","sourceRoot":"","sources":["../../../src/embeddings/embedding-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAkB;IAClD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC3B,OAAO,QAAQ,CAAA;IACjB,CAAC;IACD,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAA;IAC3D,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC3B,OAAO,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,GAAG,CAAA;IAChD,CAAC;IACD,6BAA6B;IAC7B,OAAO,KAAK,CAAA;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAA;QAChC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAA,CAAC,4BAA4B;IACjD,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAY,EAAE,SAAiB;IACnE,MAAM,SAAS,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAA;IAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,kFAAkF;QAClF,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,GAAG,GAAG,CAAA;QACtD,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,CAAA;IACtB,CAAC;IAED,uBAAuB;IACvB,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,IAAI,IAAI,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAA;IACrC,CAAC;IACD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACtB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;QACtB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC;AAED,gFAAgF;AAChF,IAAI,cAAc,GAAsD,IAAI,CAAA;AAC5E,IAAI,mBAAmB,GAA+D,IAAI,CAAA;AAC1F,IAAI,kBAAkB,GAAG,KAAK,CAAA;AAC9B,IAAI,iBAAiB,GAAiB,IAAI,CAAA;AAE1C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAG1C,yCAAyC;IACzC,IAAI,cAAc,EAAE,CAAC;QACnB,OAAO,cAAc,CAAA;IACvB,CAAC;IAED,6CAA6C;IAC7C,IAAI,kBAAkB,EAAE,CAAC;QACvB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,2CAA2C;IAC3C,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACzB,mBAAmB,GAAG,MAAM,CAAC,2BAA2B,CAAC;aACtD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;YACZ,cAAc,GAAG,GAAG,CAAA;YACpB,OAAO,GAAG,CAAA;QACZ,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;YACb,kBAAkB,GAAG,IAAI,CAAA;YACzB,iBAAiB,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAA;YACvE,OAAO,IAA6D,CAAA;QACtE,CAAC,CAAC,CAAA;IACN,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAA;IACxC,OAAO,MAAM,IAAI,IAAI,CAAA;AACvB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,uBAAuB;IACrC,IAAI,cAAc;QAAE,OAAO,IAAI,CAAA;IAC/B,IAAI,kBAAkB;QAAE,OAAO,KAAK,CAAA;IACpC,OAAO,SAAS,CAAA;AAClB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,6BAA6B;IACjD,MAAM,GAAG,GAAG,MAAM,sBAAsB,EAAE,CAAA;IAC1C,OAAO,GAAG,KAAK,IAAI,CAAA;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB;IACtC,OAAO,iBAAiB,CAAA;AAC1B,CAAC"}
|
|
1
|
+
{"version":3,"file":"embedding-utils.js","sourceRoot":"","sources":["../../../src/embeddings/embedding-utils.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAkB;IAClD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC3B,OAAO,QAAQ,CAAA;IACjB,CAAC;IACD,6BAA6B;IAC7B,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAA;IAC3D,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC3B,OAAO,QAAQ,KAAK,MAAM,IAAI,QAAQ,KAAK,GAAG,CAAA;IAChD,CAAC;IACD,6BAA6B;IAC7B,OAAO,KAAK,CAAA;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAA;QAChC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAA,CAAC,4BAA4B;IACjD,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAY,EAAE,SAAiB;IACnE,MAAM,SAAS,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAA;IAC7C,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAA;IAE/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,kFAAkF;QAClF,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,GAAG,GAAG,CAAA;QACtD,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,CAAA;IACtB,CAAC;IAED,uBAAuB;IACvB,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,IAAI,IAAI,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAA;IACrC,CAAC;IACD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACtB,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,SAAS,CAAC,CAAC,CAAC,IAAI,IAAI,CAAA;QACtB,CAAC;IACH,CAAC;IAED,OAAO,SAAS,CAAA;AAClB,CAAC;AAED,gFAAgF;AAChF,IAAI,cAAc,GAAsD,IAAI,CAAA;AAC5E,IAAI,mBAAmB,GAA+D,IAAI,CAAA;AAC1F,IAAI,kBAAkB,GAAG,KAAK,CAAA;AAC9B,IAAI,iBAAiB,GAAiB,IAAI,CAAA;AAE1C;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB;IAG1C,yCAAyC;IACzC,IAAI,cAAc,EAAE,CAAC;QACnB,OAAO,cAAc,CAAA;IACvB,CAAC;IAED,6CAA6C;IAC7C,IAAI,kBAAkB,EAAE,CAAC;QACvB,OAAO,IAAI,CAAA;IACb,CAAC;IAED,2CAA2C;IAC3C,IAAI,CAAC,mBAAmB,EAAE,CAAC;QACzB,mBAAmB,GAAG,MAAM,CAAC,2BAA2B,CAAC;aACtD,IAAI,CAAC,CAAC,GAAG,EAAE,EAAE;YACZ,cAAc,GAAG,GAAG,CAAA;YACpB,OAAO,GAAG,CAAA;QACZ,CAAC,CAAC;aACD,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;YACb,kBAAkB,GAAG,IAAI,CAAA;YACzB,iBAAiB,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAA;YACvE,OAAO,IAA6D,CAAA;QACtE,CAAC,CAAC,CAAA;IACN,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,mBAAmB,CAAA;IACxC,OAAO,MAAM,IAAI,IAAI,CAAA;AACvB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,uBAAuB;IACrC,IAAI,cAAc;QAAE,OAAO,IAAI,CAAA;IAC/B,IAAI,kBAAkB;QAAE,OAAO,KAAK,CAAA;IACpC,OAAO,SAAS,CAAA;AAClB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,6BAA6B;IACjD,MAAM,GAAG,GAAG,MAAM,sBAAsB,EAAE,CAAA;IAC1C,OAAO,GAAG,KAAK,IAAI,CAAA;AACrB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB;IACtC,OAAO,iBAAiB,CAAA;AAC1B,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM;QAAE,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAA;IACjF,IAAI,UAAU,GAAG,CAAC,EAChB,KAAK,GAAG,CAAC,EACT,KAAK,GAAG,CAAC,CAAA;IACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACzB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACpB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;IACtB,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IACxC,OAAO,UAAU,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAA;AAC3D,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,4BAA4B,CAC1C,UAAqC,EACrC,cAA4B,EAC5B,IAAY;IAEZ,MAAM,OAAO,GAA8C,EAAE,CAAA;IAC7D,KAAK,MAAM,CAAC,OAAO,EAAE,SAAS,CAAC,IAAI,UAAU,EAAE,CAAC;QAC9C,OAAO,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,KAAK,EAAE,gBAAgB,CAAC,cAAc,EAAE,SAAS,CAAC,EAAE,CAAC,CAAA;IAC/E,CAAC;IACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IACzC,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;AAC/B,CAAC"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SMI-4577: HNSW search backend for `EmbeddingService.findSimilar`.
|
|
3
|
+
*
|
|
4
|
+
* Lazily loads `hnswlib-node` (declared as `optionalDependencies` on
|
|
5
|
+
* @skillsmith/core), builds or loads an on-disk index at
|
|
6
|
+
* `~/.skillsmith/cache/hnsw-{modelName}.{bin,meta.json,labels.json}`,
|
|
7
|
+
* and exposes incremental `addPoint`/`markDelete` semantics with a debounced
|
|
8
|
+
* atomic-rename persist.
|
|
9
|
+
*
|
|
10
|
+
* Failure modes:
|
|
11
|
+
* - `MODULE_NOT_FOUND` on import → permanently disable; brute-force fallback
|
|
12
|
+
* in `EmbeddingService.findSimilar` covers the case.
|
|
13
|
+
* - `readIndex` failure on a corrupt cache → delete + rebuild on next call
|
|
14
|
+
* (treat as transient).
|
|
15
|
+
* - Concurrent writers → atomic-rename (`writeIndex` to `.tmp`,
|
|
16
|
+
* `fs.renameSync` to final). Loser-of-race acceptable; readers re-read
|
|
17
|
+
* via the atomic pointer.
|
|
18
|
+
*
|
|
19
|
+
* @see ADR-009 (2026-05 amendment): brute-force fallback retained for
|
|
20
|
+
* environments where the optional dep failed to install.
|
|
21
|
+
*/
|
|
22
|
+
import type { HierarchicalNSW } from './hnsw-store.types.js';
|
|
23
|
+
/**
|
|
24
|
+
* Persisted metadata describing the on-disk HNSW index. Used to invalidate the
|
|
25
|
+
* cached graph when the embedding count, model, or vector dimension drift.
|
|
26
|
+
*/
|
|
27
|
+
export interface HnswMeta {
|
|
28
|
+
/** Schema version. Bump on incompatible meta changes. */
|
|
29
|
+
version: 1;
|
|
30
|
+
/** Model identifier (e.g. `Xenova/all-MiniLM-L6-v2`). */
|
|
31
|
+
modelName: string;
|
|
32
|
+
/** Vector dimensionality. */
|
|
33
|
+
dim: number;
|
|
34
|
+
/** Number of points the cache was built from. */
|
|
35
|
+
count: number;
|
|
36
|
+
/** ISO timestamp the cache was last persisted. */
|
|
37
|
+
builtAt: string;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Wrapper exposing the live HNSW index plus the bookkeeping needed by
|
|
41
|
+
* `EmbeddingService` to rewire incremental upserts/removes.
|
|
42
|
+
*/
|
|
43
|
+
export interface HnswHandle {
|
|
44
|
+
/** The live HNSW index. */
|
|
45
|
+
index: HierarchicalNSW;
|
|
46
|
+
/** label → skillId mapping (HNSW returns numeric labels). */
|
|
47
|
+
labelToId: Map<number, string>;
|
|
48
|
+
/** skillId → label mapping (for incremental updates / deletes). */
|
|
49
|
+
idToLabel: Map<string, number>;
|
|
50
|
+
/** Next label to assign for new points. */
|
|
51
|
+
nextLabel: number;
|
|
52
|
+
/** Filesystem paths the index will read/write. Exposed for diagnostics/tests. */
|
|
53
|
+
paths: HnswCachePaths;
|
|
54
|
+
/** Schedule a debounced persist (5s). Safe to call repeatedly. */
|
|
55
|
+
schedulePersist: () => void;
|
|
56
|
+
/** Persist immediately (used at shutdown / for tests). */
|
|
57
|
+
persistNow: () => void;
|
|
58
|
+
}
|
|
59
|
+
export interface HnswCachePaths {
|
|
60
|
+
bin: string;
|
|
61
|
+
meta: string;
|
|
62
|
+
labels: string;
|
|
63
|
+
binTmp: string;
|
|
64
|
+
metaTmp: string;
|
|
65
|
+
labelsTmp: string;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Status reported back to `EmbeddingService` so it can distinguish
|
|
69
|
+
* "the optional dep is missing" (permanent) from "we hit a transient
|
|
70
|
+
* write error" (try again next time).
|
|
71
|
+
*/
|
|
72
|
+
export type HnswStatus = {
|
|
73
|
+
kind: 'ok';
|
|
74
|
+
handle: HnswHandle;
|
|
75
|
+
} | {
|
|
76
|
+
kind: 'permanently-unavailable';
|
|
77
|
+
reason: string;
|
|
78
|
+
} | {
|
|
79
|
+
kind: 'temporarily-unavailable';
|
|
80
|
+
reason: string;
|
|
81
|
+
};
|
|
82
|
+
/**
|
|
83
|
+
* Build (or load from cache) an HNSW index for the supplied embedding map.
|
|
84
|
+
*
|
|
85
|
+
* `embeddings` is the canonical source of truth (from `EmbeddingService`'s
|
|
86
|
+
* SQLite cache). On a cold start with a populated cache file matching
|
|
87
|
+
* meta.json, we `readIndex` and skip the rebuild. Otherwise we initialise
|
|
88
|
+
* a fresh index and add every point — same I/O cost as a brute-force seed
|
|
89
|
+
* but the resulting graph survives subsequent `findSimilar` calls.
|
|
90
|
+
*/
|
|
91
|
+
export declare function loadOrBuildHnsw(args: {
|
|
92
|
+
embeddings: Map<string, Float32Array>;
|
|
93
|
+
modelName: string;
|
|
94
|
+
dim: number;
|
|
95
|
+
/** Capacity hint. Defaults to ~2x current size, clamped to 1024 minimum. */
|
|
96
|
+
maxElements?: number;
|
|
97
|
+
/** Override hyperparams. Defaults match `DEFAULT_HNSW_CONFIG` in hnsw-store.types.ts. */
|
|
98
|
+
m?: number;
|
|
99
|
+
efConstruction?: number;
|
|
100
|
+
efSearch?: number;
|
|
101
|
+
}): Promise<HnswStatus>;
|
|
102
|
+
/**
|
|
103
|
+
* Top-K nearest-neighbour search via the supplied handle.
|
|
104
|
+
*
|
|
105
|
+
* @param handle - HNSW handle returned by `loadOrBuildHnsw`
|
|
106
|
+
* @param query - Query vector (must match `handle.dim`)
|
|
107
|
+
* @param topK - Maximum neighbours to return
|
|
108
|
+
* @returns Result rows in HNSW score order; `score` is `1 - cosineDistance`.
|
|
109
|
+
*/
|
|
110
|
+
export declare function findSimilarHnsw(handle: HnswHandle, query: Float32Array, topK: number): Array<{
|
|
111
|
+
skillId: string;
|
|
112
|
+
score: number;
|
|
113
|
+
}>;
|
|
114
|
+
/**
|
|
115
|
+
* Add or replace a point. Used by `EmbeddingService.storeEmbedding` to keep
|
|
116
|
+
* the in-memory graph aligned with the SQLite cache. Marks the handle dirty;
|
|
117
|
+
* persist happens via the debounced 5s timer (or `persistNow`).
|
|
118
|
+
*/
|
|
119
|
+
export declare function upsertPoint(handle: HnswHandle, skillId: string, vector: Float32Array): void;
|
|
120
|
+
/**
|
|
121
|
+
* Mark a point deleted. The point stays in the graph for traversal correctness
|
|
122
|
+
* but `findSimilarHnsw` filters it out via the labelToId lookup.
|
|
123
|
+
*/
|
|
124
|
+
export declare function removePoint(handle: HnswHandle, skillId: string): boolean;
|
|
125
|
+
/**
|
|
126
|
+
* Test-only helper — clears the cached `hnswlib-node` constructor reference so
|
|
127
|
+
* tests can simulate "module reinstalled" scenarios. Not part of the public
|
|
128
|
+
* API; do not use in production code.
|
|
129
|
+
*
|
|
130
|
+
* @internal
|
|
131
|
+
*/
|
|
132
|
+
export declare function __resetCachedHnswCtorForTests(): void;
|
|
133
|
+
//# sourceMappingURL=hnsw-search.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hnsw-search.d.ts","sourceRoot":"","sources":["../../../src/embeddings/hnsw-search.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAKH,OAAO,KAAK,EAAE,eAAe,EAA8B,MAAM,uBAAuB,CAAA;AAExF;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,yDAAyD;IACzD,OAAO,EAAE,CAAC,CAAA;IACV,yDAAyD;IACzD,SAAS,EAAE,MAAM,CAAA;IACjB,6BAA6B;IAC7B,GAAG,EAAE,MAAM,CAAA;IACX,iDAAiD;IACjD,KAAK,EAAE,MAAM,CAAA;IACb,kDAAkD;IAClD,OAAO,EAAE,MAAM,CAAA;CAChB;AAED;;;GAGG;AACH,MAAM,WAAW,UAAU;IACzB,2BAA2B;IAC3B,KAAK,EAAE,eAAe,CAAA;IACtB,6DAA6D;IAC7D,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC9B,mEAAmE;IACnE,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC9B,2CAA2C;IAC3C,SAAS,EAAE,MAAM,CAAA;IACjB,iFAAiF;IACjF,KAAK,EAAE,cAAc,CAAA;IACrB,kEAAkE;IAClE,eAAe,EAAE,MAAM,IAAI,CAAA;IAC3B,0DAA0D;IAC1D,UAAU,EAAE,MAAM,IAAI,CAAA;CACvB;AAED,MAAM,WAAW,cAAc;IAC7B,GAAG,EAAE,MAAM,CAAA;IACX,IAAI,EAAE,MAAM,CAAA;IACZ,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,EAAE,MAAM,CAAA;IACf,SAAS,EAAE,MAAM,CAAA;CAClB;AAED;;;;GAIG;AACH,MAAM,MAAM,UAAU,GAClB;IAAE,IAAI,EAAE,IAAI,CAAC;IAAC,MAAM,EAAE,UAAU,CAAA;CAAE,GAClC;IAAE,IAAI,EAAE,yBAAyB,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACnD;IAAE,IAAI,EAAE,yBAAyB,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAA;AA8FvD;;;;;;;;GAQG;AACH,wBAAsB,eAAe,CAAC,IAAI,EAAE;IAC1C,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,CAAA;IACrC,SAAS,EAAE,MAAM,CAAA;IACjB,GAAG,EAAE,MAAM,CAAA;IACX,4EAA4E;IAC5E,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,yFAAyF;IACzF,CAAC,CAAC,EAAE,MAAM,CAAA;IACV,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAA;CAClB,GAAG,OAAO,CAAC,UAAU,CAAC,CA6GtB;AAgGD;;;;;;;GAOG;AACH,wBAAgB,eAAe,CAC7B,MAAM,EAAE,UAAU,EAClB,KAAK,EAAE,YAAY,EACnB,IAAI,EAAE,MAAM,GACX,KAAK,CAAC;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAc3C;AAED;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,GAAG,IAAI,CAgB3F;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,MAAM,GAAG,OAAO,CAQxE;AAED;;;;;;GAMG;AACH,wBAAgB,6BAA6B,IAAI,IAAI,CAEpD"}
|
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SMI-4577: HNSW search backend for `EmbeddingService.findSimilar`.
|
|
3
|
+
*
|
|
4
|
+
* Lazily loads `hnswlib-node` (declared as `optionalDependencies` on
|
|
5
|
+
* @skillsmith/core), builds or loads an on-disk index at
|
|
6
|
+
* `~/.skillsmith/cache/hnsw-{modelName}.{bin,meta.json,labels.json}`,
|
|
7
|
+
* and exposes incremental `addPoint`/`markDelete` semantics with a debounced
|
|
8
|
+
* atomic-rename persist.
|
|
9
|
+
*
|
|
10
|
+
* Failure modes:
|
|
11
|
+
* - `MODULE_NOT_FOUND` on import → permanently disable; brute-force fallback
|
|
12
|
+
* in `EmbeddingService.findSimilar` covers the case.
|
|
13
|
+
* - `readIndex` failure on a corrupt cache → delete + rebuild on next call
|
|
14
|
+
* (treat as transient).
|
|
15
|
+
* - Concurrent writers → atomic-rename (`writeIndex` to `.tmp`,
|
|
16
|
+
* `fs.renameSync` to final). Loser-of-race acceptable; readers re-read
|
|
17
|
+
* via the atomic pointer.
|
|
18
|
+
*
|
|
19
|
+
* @see ADR-009 (2026-05 amendment): brute-force fallback retained for
|
|
20
|
+
* environments where the optional dep failed to install.
|
|
21
|
+
*/
|
|
22
|
+
import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync, writeFileSync } from 'fs';
|
|
23
|
+
import { dirname, join } from 'path';
|
|
24
|
+
import { getCacheDir } from '../config/index.js';
|
|
25
|
+
/**
|
|
26
|
+
* One-shot module loader. Cached across calls; on `MODULE_NOT_FOUND` the
|
|
27
|
+
* caller receives a sentinel and is expected to permanently disable HNSW.
|
|
28
|
+
*/
|
|
29
|
+
let cachedCtor = null;
|
|
30
|
+
/**
|
|
31
|
+
* Dynamically load `hnswlib-node`. Returns the constructor or `null` when the
|
|
32
|
+
* optional dependency is not installed (Vercel build, restricted hosts).
|
|
33
|
+
*
|
|
34
|
+
* Uses a literal dynamic `import()` (not the `Function('return import(...)')()`
|
|
35
|
+
* pattern that lives in `hnsw-store.helpers.ts`); vitest's vm-mode ESM rejects
|
|
36
|
+
* the latter with "A dynamic import callback was not specified." A native
|
|
37
|
+
* dynamic import is safe here because `hnswlib-node` is a CJS module that's
|
|
38
|
+
* not type-imported anywhere — only TS will error on resolution failure, but
|
|
39
|
+
* we catch that in the `catch` block below.
|
|
40
|
+
*/
|
|
41
|
+
async function loadHnswCtor() {
|
|
42
|
+
if (cachedCtor === 'unavailable')
|
|
43
|
+
return null;
|
|
44
|
+
if (cachedCtor !== null)
|
|
45
|
+
return cachedCtor;
|
|
46
|
+
try {
|
|
47
|
+
// hnswlib-node is CJS; ESM dynamic import lifts its `module.exports` onto
|
|
48
|
+
// `.default`. Some bundlers / older Node versions also surface named
|
|
49
|
+
// exports at the top level — check both shapes so we work in every
|
|
50
|
+
// environment.
|
|
51
|
+
//
|
|
52
|
+
// Note: a previous codebase pattern used `Function('return import(...)')()`
|
|
53
|
+
// here. Vitest's vm-mode ESM rejects that with "A dynamic import callback
|
|
54
|
+
// was not specified" so we use a literal dynamic `import()`. The string
|
|
55
|
+
// literal is intentional to keep TypeScript from re-routing the specifier
|
|
56
|
+
// (`hnswlib-node` is in `optionalDependencies`, not a hard dep).
|
|
57
|
+
const mod = (await import('hnswlib-node'));
|
|
58
|
+
const ctor = mod.HierarchicalNSW ?? mod.default?.HierarchicalNSW;
|
|
59
|
+
if (!ctor) {
|
|
60
|
+
cachedCtor = 'unavailable';
|
|
61
|
+
return null;
|
|
62
|
+
}
|
|
63
|
+
cachedCtor = ctor;
|
|
64
|
+
return cachedCtor;
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
cachedCtor = 'unavailable';
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
function cachePaths(modelName) {
|
|
72
|
+
// Sanitize model name (slashes become double-underscore so we don't
|
|
73
|
+
// accidentally create nested directories under cache/).
|
|
74
|
+
const safeName = modelName.replace(/[/\\]/g, '__');
|
|
75
|
+
const dir = getCacheDir();
|
|
76
|
+
const base = join(dir, `hnsw-${safeName}`);
|
|
77
|
+
return {
|
|
78
|
+
bin: `${base}.bin`,
|
|
79
|
+
meta: `${base}.meta.json`,
|
|
80
|
+
labels: `${base}.labels.json`,
|
|
81
|
+
binTmp: `${base}.bin.tmp`,
|
|
82
|
+
metaTmp: `${base}.meta.json.tmp`,
|
|
83
|
+
labelsTmp: `${base}.labels.json.tmp`,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
function readMeta(metaPath) {
|
|
87
|
+
if (!existsSync(metaPath))
|
|
88
|
+
return null;
|
|
89
|
+
try {
|
|
90
|
+
const parsed = JSON.parse(readFileSync(metaPath, 'utf-8'));
|
|
91
|
+
if (parsed.version !== 1)
|
|
92
|
+
return null;
|
|
93
|
+
return parsed;
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
function readLabels(labelsPath) {
|
|
100
|
+
if (!existsSync(labelsPath))
|
|
101
|
+
return null;
|
|
102
|
+
try {
|
|
103
|
+
const parsed = JSON.parse(readFileSync(labelsPath, 'utf-8'));
|
|
104
|
+
if (!Array.isArray(parsed))
|
|
105
|
+
return null;
|
|
106
|
+
return parsed;
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
return null;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
function writeAtomic(tmp, final, contents) {
|
|
113
|
+
mkdirSync(dirname(tmp), { recursive: true });
|
|
114
|
+
writeFileSync(tmp, contents, typeof contents === 'string' ? { encoding: 'utf-8' } : undefined);
|
|
115
|
+
renameSync(tmp, final);
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Build (or load from cache) an HNSW index for the supplied embedding map.
|
|
119
|
+
*
|
|
120
|
+
* `embeddings` is the canonical source of truth (from `EmbeddingService`'s
|
|
121
|
+
* SQLite cache). On a cold start with a populated cache file matching
|
|
122
|
+
* meta.json, we `readIndex` and skip the rebuild. Otherwise we initialise
|
|
123
|
+
* a fresh index and add every point — same I/O cost as a brute-force seed
|
|
124
|
+
* but the resulting graph survives subsequent `findSimilar` calls.
|
|
125
|
+
*/
|
|
126
|
+
export async function loadOrBuildHnsw(args) {
|
|
127
|
+
const Ctor = await loadHnswCtor();
|
|
128
|
+
if (!Ctor) {
|
|
129
|
+
return {
|
|
130
|
+
kind: 'permanently-unavailable',
|
|
131
|
+
reason: 'hnswlib-node not installed (optionalDependencies)',
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
const paths = cachePaths(args.modelName);
|
|
135
|
+
const meta = readMeta(paths.meta);
|
|
136
|
+
const labels = readLabels(paths.labels);
|
|
137
|
+
const count = args.embeddings.size;
|
|
138
|
+
// R2 mitigation: defaults tuned to clear the recall@10 ≥ 0.95 gate.
|
|
139
|
+
// m=32/efConstruction=400/efSearch=200 is the `large` preset from
|
|
140
|
+
// hnsw-store.types.ts and matches what the original SMI-1519 design
|
|
141
|
+
// documented for 100k-scale skill registries. At 14k synthetic vectors
|
|
142
|
+
// we still see >100x speedup vs brute-force p99.
|
|
143
|
+
const m = args.m ?? 32;
|
|
144
|
+
const efConstruction = args.efConstruction ?? 400;
|
|
145
|
+
const efSearch = args.efSearch ?? 200;
|
|
146
|
+
const capacity = Math.max(args.maxElements ?? Math.max(count * 2, 1024), 1024);
|
|
147
|
+
const reusable = meta !== null &&
|
|
148
|
+
labels !== null &&
|
|
149
|
+
meta.modelName === args.modelName &&
|
|
150
|
+
meta.dim === args.dim &&
|
|
151
|
+
meta.count === count &&
|
|
152
|
+
existsSync(paths.bin);
|
|
153
|
+
let index;
|
|
154
|
+
let labelToId;
|
|
155
|
+
let idToLabel;
|
|
156
|
+
let nextLabel;
|
|
157
|
+
if (reusable) {
|
|
158
|
+
try {
|
|
159
|
+
index = new Ctor('cosine', args.dim);
|
|
160
|
+
// Use sync read; the async `readIndex` returns a Promise we'd have to
|
|
161
|
+
// await, defeating the synchronous boot path. Sync is fine here — the
|
|
162
|
+
// file is < a few MB at expected scale.
|
|
163
|
+
index.readIndexSync(paths.bin, true);
|
|
164
|
+
index.setEf(efSearch);
|
|
165
|
+
labelToId = new Map(labels);
|
|
166
|
+
idToLabel = new Map(labels.map(([label, id]) => [id, label]));
|
|
167
|
+
nextLabel = labels.reduce((max, [label]) => Math.max(max, label), -1) + 1;
|
|
168
|
+
}
|
|
169
|
+
catch (err) {
|
|
170
|
+
// Corrupt cache — wipe and recurse for a fresh build. The retry will
|
|
171
|
+
// hit `reusable === false` because we just removed the artefacts.
|
|
172
|
+
try {
|
|
173
|
+
if (existsSync(paths.bin))
|
|
174
|
+
unlinkSync(paths.bin);
|
|
175
|
+
if (existsSync(paths.meta))
|
|
176
|
+
unlinkSync(paths.meta);
|
|
177
|
+
if (existsSync(paths.labels))
|
|
178
|
+
unlinkSync(paths.labels);
|
|
179
|
+
}
|
|
180
|
+
catch {
|
|
181
|
+
/* best-effort cleanup */
|
|
182
|
+
}
|
|
183
|
+
try {
|
|
184
|
+
return await loadOrBuildHnsw(args);
|
|
185
|
+
}
|
|
186
|
+
catch (retryErr) {
|
|
187
|
+
return {
|
|
188
|
+
kind: 'temporarily-unavailable',
|
|
189
|
+
reason: `cache rebuild failed after corrupt-load: ${String(retryErr)} (initial: ${String(err)})`,
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
index = new Ctor('cosine', args.dim);
|
|
196
|
+
index.initIndex(capacity, m, efConstruction);
|
|
197
|
+
index.setEf(efSearch);
|
|
198
|
+
labelToId = new Map();
|
|
199
|
+
idToLabel = new Map();
|
|
200
|
+
let nextLabelLocal = 0;
|
|
201
|
+
for (const [skillId, vec] of args.embeddings) {
|
|
202
|
+
// hnswlib-node@3 expects a plain Array<number> for addPoint, not a
|
|
203
|
+
// typed array — passing Float32Array surfaces as
|
|
204
|
+
// "Invalid the first argument type, must be an Array."
|
|
205
|
+
index.addPoint(Array.from(vec), nextLabelLocal);
|
|
206
|
+
labelToId.set(nextLabelLocal, skillId);
|
|
207
|
+
idToLabel.set(skillId, nextLabelLocal);
|
|
208
|
+
nextLabelLocal++;
|
|
209
|
+
}
|
|
210
|
+
nextLabel = nextLabelLocal;
|
|
211
|
+
}
|
|
212
|
+
const handle = createHandle({
|
|
213
|
+
index,
|
|
214
|
+
labelToId,
|
|
215
|
+
idToLabel,
|
|
216
|
+
nextLabel,
|
|
217
|
+
dim: args.dim,
|
|
218
|
+
modelName: args.modelName,
|
|
219
|
+
paths,
|
|
220
|
+
});
|
|
221
|
+
// Immediate persist after a fresh build. Reusable path skips this — the
|
|
222
|
+
// on-disk artefacts already match.
|
|
223
|
+
if (!reusable) {
|
|
224
|
+
try {
|
|
225
|
+
handle.persistNow();
|
|
226
|
+
}
|
|
227
|
+
catch (err) {
|
|
228
|
+
return {
|
|
229
|
+
kind: 'temporarily-unavailable',
|
|
230
|
+
reason: `persist after build failed: ${String(err)}`,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
return { kind: 'ok', handle };
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Wrap the raw HNSW index with the bookkeeping needed for incremental upserts
|
|
238
|
+
* + debounced persist. Caller owns the handle's lifecycle (no auto-cleanup).
|
|
239
|
+
*/
|
|
240
|
+
function createHandle(args) {
|
|
241
|
+
let dirty = false;
|
|
242
|
+
let timer = null;
|
|
243
|
+
// Mutable closure state — we update via the handle methods below, but the
|
|
244
|
+
// returned object exposes the current values via getters.
|
|
245
|
+
const state = {
|
|
246
|
+
nextLabel: args.nextLabel,
|
|
247
|
+
};
|
|
248
|
+
const persistNow = () => {
|
|
249
|
+
if (timer) {
|
|
250
|
+
clearTimeout(timer);
|
|
251
|
+
timer = null;
|
|
252
|
+
}
|
|
253
|
+
if (!dirty && existsSync(args.paths.bin) && existsSync(args.paths.meta)) {
|
|
254
|
+
// Nothing to write and the cache is already consistent on disk.
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
// `writeIndex` is async (returns a Promise) — we want sync semantics so
|
|
258
|
+
// the persist runs to completion inside the debounce callback / on close.
|
|
259
|
+
args.index.writeIndexSync(args.paths.binTmp);
|
|
260
|
+
renameSync(args.paths.binTmp, args.paths.bin);
|
|
261
|
+
const labelsArr = Array.from(args.labelToId.entries());
|
|
262
|
+
writeAtomic(args.paths.labelsTmp, args.paths.labels, JSON.stringify(labelsArr));
|
|
263
|
+
const meta = {
|
|
264
|
+
version: 1,
|
|
265
|
+
modelName: args.modelName,
|
|
266
|
+
dim: args.dim,
|
|
267
|
+
count: args.idToLabel.size,
|
|
268
|
+
builtAt: new Date().toISOString(),
|
|
269
|
+
};
|
|
270
|
+
writeAtomic(args.paths.metaTmp, args.paths.meta, JSON.stringify(meta, null, 2));
|
|
271
|
+
dirty = false;
|
|
272
|
+
};
|
|
273
|
+
const schedulePersist = () => {
|
|
274
|
+
dirty = true;
|
|
275
|
+
if (timer)
|
|
276
|
+
clearTimeout(timer);
|
|
277
|
+
timer = setTimeout(() => {
|
|
278
|
+
timer = null;
|
|
279
|
+
try {
|
|
280
|
+
persistNow();
|
|
281
|
+
}
|
|
282
|
+
catch (err) {
|
|
283
|
+
// Persist failures are non-fatal — the in-memory index stays valid;
|
|
284
|
+
// a future rebuild will recover from the cached embeddings map.
|
|
285
|
+
// Log the cache path so a user can spot a read-only cache dir.
|
|
286
|
+
console.warn(`[hnsw-search] debounced persist failed (path=${args.paths.bin}):`, err instanceof Error ? err.message : err);
|
|
287
|
+
}
|
|
288
|
+
}, 5000);
|
|
289
|
+
// Don't keep the event loop alive just for this timer.
|
|
290
|
+
if (timer && typeof timer.unref === 'function')
|
|
291
|
+
timer.unref();
|
|
292
|
+
};
|
|
293
|
+
return {
|
|
294
|
+
get index() {
|
|
295
|
+
return args.index;
|
|
296
|
+
},
|
|
297
|
+
get labelToId() {
|
|
298
|
+
return args.labelToId;
|
|
299
|
+
},
|
|
300
|
+
get idToLabel() {
|
|
301
|
+
return args.idToLabel;
|
|
302
|
+
},
|
|
303
|
+
get nextLabel() {
|
|
304
|
+
return state.nextLabel;
|
|
305
|
+
},
|
|
306
|
+
set nextLabel(v) {
|
|
307
|
+
state.nextLabel = v;
|
|
308
|
+
},
|
|
309
|
+
get paths() {
|
|
310
|
+
return args.paths;
|
|
311
|
+
},
|
|
312
|
+
schedulePersist,
|
|
313
|
+
persistNow,
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Top-K nearest-neighbour search via the supplied handle.
|
|
318
|
+
*
|
|
319
|
+
* @param handle - HNSW handle returned by `loadOrBuildHnsw`
|
|
320
|
+
* @param query - Query vector (must match `handle.dim`)
|
|
321
|
+
* @param topK - Maximum neighbours to return
|
|
322
|
+
* @returns Result rows in HNSW score order; `score` is `1 - cosineDistance`.
|
|
323
|
+
*/
|
|
324
|
+
export function findSimilarHnsw(handle, query, topK) {
|
|
325
|
+
const liveCount = handle.idToLabel.size;
|
|
326
|
+
if (liveCount === 0)
|
|
327
|
+
return [];
|
|
328
|
+
const k = Math.min(topK, liveCount);
|
|
329
|
+
// searchKnn also requires plain Array — Float32Array triggers
|
|
330
|
+
// "Invalid the first argument type, must be an Array."
|
|
331
|
+
const result = handle.index.searchKnn(Array.from(query), k);
|
|
332
|
+
const out = [];
|
|
333
|
+
for (let i = 0; i < result.neighbors.length; i++) {
|
|
334
|
+
const skillId = handle.labelToId.get(result.neighbors[i]);
|
|
335
|
+
if (!skillId)
|
|
336
|
+
continue; // marked-deleted points may still surface; skip
|
|
337
|
+
out.push({ skillId, score: 1 - result.distances[i] });
|
|
338
|
+
}
|
|
339
|
+
return out;
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Add or replace a point. Used by `EmbeddingService.storeEmbedding` to keep
|
|
343
|
+
* the in-memory graph aligned with the SQLite cache. Marks the handle dirty;
|
|
344
|
+
* persist happens via the debounced 5s timer (or `persistNow`).
|
|
345
|
+
*/
|
|
346
|
+
export function upsertPoint(handle, skillId, vector) {
|
|
347
|
+
const existing = handle.idToLabel.get(skillId);
|
|
348
|
+
if (existing !== undefined) {
|
|
349
|
+
// hnswlib supports `addPoint(..., replaceDeleted=true)` for true
|
|
350
|
+
// replacement; for non-deleted points we mark + reinsert under a new
|
|
351
|
+
// label to preserve correctness across efConstruction tuning.
|
|
352
|
+
handle.index.markDelete(existing);
|
|
353
|
+
handle.labelToId.delete(existing);
|
|
354
|
+
}
|
|
355
|
+
const label = handle.nextLabel;
|
|
356
|
+
// Plain Array required (see addPoint comment above).
|
|
357
|
+
handle.index.addPoint(Array.from(vector), label);
|
|
358
|
+
handle.idToLabel.set(skillId, label);
|
|
359
|
+
handle.labelToId.set(label, skillId);
|
|
360
|
+
handle.nextLabel = label + 1;
|
|
361
|
+
handle.schedulePersist();
|
|
362
|
+
}
|
|
363
|
+
/**
|
|
364
|
+
* Mark a point deleted. The point stays in the graph for traversal correctness
|
|
365
|
+
* but `findSimilarHnsw` filters it out via the labelToId lookup.
|
|
366
|
+
*/
|
|
367
|
+
export function removePoint(handle, skillId) {
|
|
368
|
+
const label = handle.idToLabel.get(skillId);
|
|
369
|
+
if (label === undefined)
|
|
370
|
+
return false;
|
|
371
|
+
handle.index.markDelete(label);
|
|
372
|
+
handle.idToLabel.delete(skillId);
|
|
373
|
+
handle.labelToId.delete(label);
|
|
374
|
+
handle.schedulePersist();
|
|
375
|
+
return true;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Test-only helper — clears the cached `hnswlib-node` constructor reference so
|
|
379
|
+
* tests can simulate "module reinstalled" scenarios. Not part of the public
|
|
380
|
+
* API; do not use in production code.
|
|
381
|
+
*
|
|
382
|
+
* @internal
|
|
383
|
+
*/
|
|
384
|
+
export function __resetCachedHnswCtorForTests() {
|
|
385
|
+
cachedCtor = null;
|
|
386
|
+
}
|
|
387
|
+
//# sourceMappingURL=hnsw-search.js.map
|