bluera-knowledge 0.19.6 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/dist/{chunk-JJYYK726.js → chunk-26MBEEKM.js} +2 -2
- package/dist/{chunk-VR5EDQTX.js → chunk-OZIVRLZE.js} +2 -2
- package/dist/{chunk-T7MENUKF.js → chunk-PZE2MO7H.js} +182 -29
- package/dist/chunk-PZE2MO7H.js.map +1 -0
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.d.ts +63 -7
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +3 -3
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-T7MENUKF.js.map +0 -1
- /package/dist/{chunk-JJYYK726.js.map → chunk-26MBEEKM.js.map} +0 -0
- /package/dist/{chunk-VR5EDQTX.js.map → chunk-OZIVRLZE.js.map} +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,33 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [0.20.0](https://github.com/blueraai/bluera-knowledge/compare/v0.19.7...v0.20.0) (2026-02-01)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* **embeddings:** add query/document prefix support for asymmetric models ([51a1a38](https://github.com/blueraai/bluera-knowledge/commit/51a1a380368a3a077b01e799ab65ff449eed8a46))
|
|
11
|
+
* **embeddings:** add state query and reset methods ([5275e28](https://github.com/blueraai/bluera-knowledge/commit/5275e283cdf4a72e21e44b08fc2075d4fee88aa7))
|
|
12
|
+
* **hooks:** add async execution and improve BK usage reminders ([5576a9f](https://github.com/blueraai/bluera-knowledge/commit/5576a9fdd0d0e9a05193c2b700e600e6756736e4))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### Bug Fixes
|
|
16
|
+
|
|
17
|
+
* **gitignore:** correct pattern ordering for bluera-base ([f9b38c8](https://github.com/blueraai/bluera-knowledge/commit/f9b38c80ee4b4d84330fef43eef2f6f452b78a61))
|
|
18
|
+
|
|
19
|
+
## [0.19.7](https://github.com/blueraai/bluera-knowledge/compare/v0.19.4...v0.19.7) (2026-01-31)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
### Features
|
|
23
|
+
|
|
24
|
+
* **suggest:** present selectable list instead of copy-paste commands ([b8f3de2](https://github.com/blueraai/bluera-knowledge/commit/b8f3de2ab02dfa2dfa0d219bb3785c1491ae3d1a))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
### Bug Fixes
|
|
28
|
+
|
|
29
|
+
* **embeddings:** improve reliability and performance ([d37c219](https://github.com/blueraai/bluera-knowledge/commit/d37c2190500f845c6bb7da78b432cf11b272b0f4))
|
|
30
|
+
* **gitignore:** add logs directory to ignored patterns ([ec9faf4](https://github.com/blueraai/bluera-knowledge/commit/ec9faf482e8fc8ba1cbf6619a8c561eb51e35f3c))
|
|
31
|
+
|
|
5
32
|
## [0.19.6](https://github.com/blueraai/bluera-knowledge/compare/v0.19.4...v0.19.6) (2026-01-31)
|
|
6
33
|
|
|
7
34
|
|
|
@@ -2,7 +2,7 @@ import {
|
|
|
2
2
|
createLogger,
|
|
3
3
|
summarizePayload,
|
|
4
4
|
truncateForLog
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-PZE2MO7H.js";
|
|
6
6
|
|
|
7
7
|
// src/crawl/intelligent-crawler.ts
|
|
8
8
|
import { EventEmitter } from "events";
|
|
@@ -916,4 +916,4 @@ export {
|
|
|
916
916
|
getCrawlStrategy,
|
|
917
917
|
IntelligentCrawler
|
|
918
918
|
};
|
|
919
|
-
//# sourceMappingURL=chunk-
|
|
919
|
+
//# sourceMappingURL=chunk-26MBEEKM.js.map
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
isRepoStoreDefinition,
|
|
10
10
|
isWebStoreDefinition,
|
|
11
11
|
summarizePayload
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-PZE2MO7H.js";
|
|
13
13
|
|
|
14
14
|
// src/mcp/server.ts
|
|
15
15
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
@@ -2202,4 +2202,4 @@ export {
|
|
|
2202
2202
|
createMCPServer,
|
|
2203
2203
|
runMCPServer
|
|
2204
2204
|
};
|
|
2205
|
-
//# sourceMappingURL=chunk-
|
|
2205
|
+
//# sourceMappingURL=chunk-OZIVRLZE.js.map
|
|
@@ -2064,7 +2064,13 @@ var DEFAULT_CONFIG = {
|
|
|
2064
2064
|
dataDir: ".bluera/bluera-knowledge/data",
|
|
2065
2065
|
embedding: {
|
|
2066
2066
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
2067
|
-
batchSize: 32
|
|
2067
|
+
batchSize: 32,
|
|
2068
|
+
dtype: "fp32",
|
|
2069
|
+
pooling: "mean",
|
|
2070
|
+
normalize: true,
|
|
2071
|
+
queryPrefix: "",
|
|
2072
|
+
docPrefix: "",
|
|
2073
|
+
maxInFlightBatches: 1
|
|
2068
2074
|
},
|
|
2069
2075
|
indexing: {
|
|
2070
2076
|
concurrency: 4,
|
|
@@ -3974,7 +3980,7 @@ var SearchService = class {
|
|
|
3974
3980
|
* Returns results with raw cosine similarity scores [0-1].
|
|
3975
3981
|
*/
|
|
3976
3982
|
async vectorSearchRaw(query, stores, limit) {
|
|
3977
|
-
const queryVector = await this.embeddingEngine.
|
|
3983
|
+
const queryVector = await this.embeddingEngine.embedQuery(query);
|
|
3978
3984
|
const results = [];
|
|
3979
3985
|
for (const storeId of stores) {
|
|
3980
3986
|
const hits = await this.lanceStore.search(storeId, queryVector, limit);
|
|
@@ -5418,23 +5424,71 @@ import { homedir as homedir2 } from "os";
|
|
|
5418
5424
|
import { join as join11 } from "path";
|
|
5419
5425
|
import { pipeline, env } from "@huggingface/transformers";
|
|
5420
5426
|
env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
|
|
5427
|
+
var DEFAULT_EMBEDDING_CONFIG = {
|
|
5428
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
5429
|
+
batchSize: 32,
|
|
5430
|
+
dtype: "fp32",
|
|
5431
|
+
pooling: "mean",
|
|
5432
|
+
normalize: true,
|
|
5433
|
+
queryPrefix: "",
|
|
5434
|
+
docPrefix: "",
|
|
5435
|
+
maxInFlightBatches: 1
|
|
5436
|
+
};
|
|
5421
5437
|
var EmbeddingEngine = class {
|
|
5422
5438
|
extractor = null;
|
|
5423
|
-
|
|
5439
|
+
initPromise = null;
|
|
5440
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in embed() and embedBatch()
|
|
5424
5441
|
_dimensions = null;
|
|
5425
|
-
|
|
5426
|
-
|
|
5427
|
-
|
|
5428
|
-
|
|
5429
|
-
this.
|
|
5442
|
+
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
5443
|
+
disposed = false;
|
|
5444
|
+
config;
|
|
5445
|
+
constructor(config = DEFAULT_EMBEDDING_CONFIG) {
|
|
5446
|
+
this.config = config;
|
|
5430
5447
|
}
|
|
5448
|
+
/**
|
|
5449
|
+
* Guard against use-after-dispose
|
|
5450
|
+
*/
|
|
5451
|
+
assertNotDisposed() {
|
|
5452
|
+
if (this.disposed) {
|
|
5453
|
+
throw new Error("EmbeddingEngine has been disposed");
|
|
5454
|
+
}
|
|
5455
|
+
}
|
|
5456
|
+
/**
|
|
5457
|
+
* Initialize the embedding pipeline (concurrency-safe).
|
|
5458
|
+
* Multiple concurrent calls will share the same initialization promise.
|
|
5459
|
+
*/
|
|
5431
5460
|
async initialize() {
|
|
5461
|
+
this.assertNotDisposed();
|
|
5432
5462
|
if (this.extractor !== null) return;
|
|
5433
|
-
this.
|
|
5434
|
-
|
|
5435
|
-
|
|
5463
|
+
this.initPromise ??= (async () => {
|
|
5464
|
+
try {
|
|
5465
|
+
this.extractor = await pipeline("feature-extraction", this.config.model, {
|
|
5466
|
+
dtype: this.config.dtype
|
|
5467
|
+
});
|
|
5468
|
+
} catch (error) {
|
|
5469
|
+
this.initPromise = null;
|
|
5470
|
+
throw error;
|
|
5471
|
+
}
|
|
5472
|
+
})();
|
|
5473
|
+
await this.initPromise;
|
|
5474
|
+
}
|
|
5475
|
+
/**
|
|
5476
|
+
* Embed a search query. Applies queryPrefix for asymmetric models.
|
|
5477
|
+
*/
|
|
5478
|
+
async embedQuery(text) {
|
|
5479
|
+
return this.embedText(this.config.queryPrefix + text);
|
|
5436
5480
|
}
|
|
5437
|
-
|
|
5481
|
+
/**
|
|
5482
|
+
* Embed a document for indexing. Applies docPrefix for asymmetric models.
|
|
5483
|
+
*/
|
|
5484
|
+
async embedDocument(text) {
|
|
5485
|
+
return this.embedText(this.config.docPrefix + text);
|
|
5486
|
+
}
|
|
5487
|
+
/**
|
|
5488
|
+
* Internal: embed text without prefix.
|
|
5489
|
+
*/
|
|
5490
|
+
async embedText(text) {
|
|
5491
|
+
this.assertNotDisposed();
|
|
5438
5492
|
if (this.extractor === null) {
|
|
5439
5493
|
await this.initialize();
|
|
5440
5494
|
}
|
|
@@ -5442,25 +5496,98 @@ var EmbeddingEngine = class {
|
|
|
5442
5496
|
throw new Error("Failed to initialize embedding model");
|
|
5443
5497
|
}
|
|
5444
5498
|
const output = await this.extractor(text, {
|
|
5445
|
-
pooling:
|
|
5446
|
-
normalize:
|
|
5499
|
+
pooling: this.config.pooling,
|
|
5500
|
+
normalize: this.config.normalize
|
|
5447
5501
|
});
|
|
5448
|
-
const
|
|
5449
|
-
this._dimensions ??=
|
|
5450
|
-
return
|
|
5502
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5503
|
+
this._dimensions ??= dim;
|
|
5504
|
+
return Float32Array.from(output.data);
|
|
5451
5505
|
}
|
|
5506
|
+
/**
|
|
5507
|
+
* Embed a batch of documents with optional parallelism.
|
|
5508
|
+
* When maxInFlightBatches > 1, processes multiple batches concurrently.
|
|
5509
|
+
*/
|
|
5452
5510
|
async embedBatch(texts) {
|
|
5511
|
+
this.assertNotDisposed();
|
|
5512
|
+
if (this.extractor === null) {
|
|
5513
|
+
await this.initialize();
|
|
5514
|
+
}
|
|
5515
|
+
if (this.extractor === null) {
|
|
5516
|
+
throw new Error("Failed to initialize embedding model");
|
|
5517
|
+
}
|
|
5518
|
+
const batches = [];
|
|
5519
|
+
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
5520
|
+
batches.push(texts.slice(i, i + this.config.batchSize));
|
|
5521
|
+
}
|
|
5522
|
+
if (batches.length === 0) {
|
|
5523
|
+
return [];
|
|
5524
|
+
}
|
|
5525
|
+
if (this.config.maxInFlightBatches <= 1) {
|
|
5526
|
+
return this.embedBatchesSequential(batches);
|
|
5527
|
+
} else {
|
|
5528
|
+
return this.embedBatchesConcurrent(batches);
|
|
5529
|
+
}
|
|
5530
|
+
}
|
|
5531
|
+
/**
|
|
5532
|
+
* Process batches sequentially (original behavior).
|
|
5533
|
+
*/
|
|
5534
|
+
async embedBatchesSequential(batches) {
|
|
5453
5535
|
const results = [];
|
|
5454
|
-
for (let i = 0; i <
|
|
5455
|
-
const batch =
|
|
5456
|
-
|
|
5536
|
+
for (let i = 0; i < batches.length; i++) {
|
|
5537
|
+
const batch = batches[i];
|
|
5538
|
+
if (batch === void 0) continue;
|
|
5539
|
+
const batchResults = await this.processSingleBatch(batch);
|
|
5457
5540
|
results.push(...batchResults);
|
|
5458
|
-
if (i
|
|
5459
|
-
await new Promise((resolve4) =>
|
|
5541
|
+
if (i < batches.length - 1) {
|
|
5542
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
5460
5543
|
}
|
|
5461
5544
|
}
|
|
5462
5545
|
return results;
|
|
5463
5546
|
}
|
|
5547
|
+
/**
|
|
5548
|
+
* Process batches with controlled concurrency.
|
|
5549
|
+
*/
|
|
5550
|
+
async embedBatchesConcurrent(batches) {
|
|
5551
|
+
const results = new Array(batches.length);
|
|
5552
|
+
let inFlight = 0;
|
|
5553
|
+
const maxConcurrent = this.config.maxInFlightBatches;
|
|
5554
|
+
await Promise.all(
|
|
5555
|
+
batches.map(async (batch, idx) => {
|
|
5556
|
+
while (inFlight >= maxConcurrent) {
|
|
5557
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
5558
|
+
}
|
|
5559
|
+
inFlight++;
|
|
5560
|
+
try {
|
|
5561
|
+
results[idx] = await this.processSingleBatch(batch);
|
|
5562
|
+
} finally {
|
|
5563
|
+
inFlight--;
|
|
5564
|
+
}
|
|
5565
|
+
})
|
|
5566
|
+
);
|
|
5567
|
+
return results.flat();
|
|
5568
|
+
}
|
|
5569
|
+
/**
|
|
5570
|
+
* Process a single batch and return embeddings.
|
|
5571
|
+
*/
|
|
5572
|
+
async processSingleBatch(batch) {
|
|
5573
|
+
if (this.extractor === null) {
|
|
5574
|
+
throw new Error("Extractor not initialized");
|
|
5575
|
+
}
|
|
5576
|
+
const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
|
|
5577
|
+
const output = await this.extractor(prefixedBatch, {
|
|
5578
|
+
pooling: this.config.pooling,
|
|
5579
|
+
normalize: this.config.normalize
|
|
5580
|
+
});
|
|
5581
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5582
|
+
const batchResults = [];
|
|
5583
|
+
for (let b = 0; b < batch.length; b++) {
|
|
5584
|
+
const start = b * dim;
|
|
5585
|
+
const end = start + dim;
|
|
5586
|
+
batchResults.push(Float32Array.from(output.data.slice(start, end)));
|
|
5587
|
+
}
|
|
5588
|
+
this._dimensions ??= dim;
|
|
5589
|
+
return batchResults;
|
|
5590
|
+
}
|
|
5464
5591
|
/**
|
|
5465
5592
|
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
5466
5593
|
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
@@ -5471,13 +5598,38 @@ var EmbeddingEngine = class {
|
|
|
5471
5598
|
}
|
|
5472
5599
|
return this._dimensions;
|
|
5473
5600
|
}
|
|
5601
|
+
/**
|
|
5602
|
+
* Check if the embedding pipeline is initialized.
|
|
5603
|
+
*/
|
|
5604
|
+
isInitialized() {
|
|
5605
|
+
return this.extractor !== null;
|
|
5606
|
+
}
|
|
5607
|
+
/**
|
|
5608
|
+
* Check if this engine has been disposed.
|
|
5609
|
+
*/
|
|
5610
|
+
isDisposed() {
|
|
5611
|
+
return this.disposed;
|
|
5612
|
+
}
|
|
5613
|
+
/**
|
|
5614
|
+
* Reset the engine to uninitialized state, allowing reuse after disposal.
|
|
5615
|
+
* If currently initialized, disposes the pipeline first.
|
|
5616
|
+
*/
|
|
5617
|
+
async reset() {
|
|
5618
|
+
if (this.extractor !== null) {
|
|
5619
|
+
await this.extractor.dispose();
|
|
5620
|
+
this.extractor = null;
|
|
5621
|
+
}
|
|
5622
|
+
this.initPromise = null;
|
|
5623
|
+
this._dimensions = null;
|
|
5624
|
+
this.disposed = false;
|
|
5625
|
+
}
|
|
5474
5626
|
/**
|
|
5475
5627
|
* Ensure dimensions are available, initializing the model if needed.
|
|
5476
5628
|
* Returns the embedding dimensions for the current model.
|
|
5477
5629
|
*/
|
|
5478
5630
|
async ensureDimensions() {
|
|
5479
5631
|
if (this._dimensions === null) {
|
|
5480
|
-
await this.
|
|
5632
|
+
await this.embedText("dimension probe");
|
|
5481
5633
|
}
|
|
5482
5634
|
if (this._dimensions === null) {
|
|
5483
5635
|
throw new Error("Failed to determine embedding dimensions");
|
|
@@ -5487,12 +5639,16 @@ var EmbeddingEngine = class {
|
|
|
5487
5639
|
/**
|
|
5488
5640
|
* Dispose the embedding pipeline to free resources.
|
|
5489
5641
|
* Should be called before process exit to prevent ONNX runtime cleanup issues on macOS.
|
|
5642
|
+
* After disposal, this engine cannot be used again.
|
|
5490
5643
|
*/
|
|
5491
5644
|
async dispose() {
|
|
5492
5645
|
if (this.extractor !== null) {
|
|
5493
5646
|
await this.extractor.dispose();
|
|
5494
5647
|
this.extractor = null;
|
|
5495
5648
|
}
|
|
5649
|
+
this.initPromise = null;
|
|
5650
|
+
this._dimensions = null;
|
|
5651
|
+
this.disposed = true;
|
|
5496
5652
|
}
|
|
5497
5653
|
};
|
|
5498
5654
|
|
|
@@ -5689,10 +5845,7 @@ var LazyServiceContainer = class {
|
|
|
5689
5845
|
get embeddings() {
|
|
5690
5846
|
if (this._embeddings === null) {
|
|
5691
5847
|
logger4.debug("Lazy-initializing EmbeddingEngine");
|
|
5692
|
-
this._embeddings = new EmbeddingEngine(
|
|
5693
|
-
this.appConfig.embedding.model,
|
|
5694
|
-
this.appConfig.embedding.batchSize
|
|
5695
|
-
);
|
|
5848
|
+
this._embeddings = new EmbeddingEngine(this.appConfig.embedding);
|
|
5696
5849
|
}
|
|
5697
5850
|
return this._embeddings;
|
|
5698
5851
|
}
|
|
@@ -5795,7 +5948,7 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
5795
5948
|
const pythonBridge = new PythonBridge();
|
|
5796
5949
|
await pythonBridge.start();
|
|
5797
5950
|
const lance = new LanceStore(resolvedDataDir);
|
|
5798
|
-
const embeddings = new EmbeddingEngine(appConfig.embedding
|
|
5951
|
+
const embeddings = new EmbeddingEngine(appConfig.embedding);
|
|
5799
5952
|
await embeddings.initialize();
|
|
5800
5953
|
const resolvedProjectRoot = config.resolveProjectRoot();
|
|
5801
5954
|
const definitionService = new StoreDefinitionService(resolvedProjectRoot);
|
|
@@ -5901,4 +6054,4 @@ export {
|
|
|
5901
6054
|
createServices,
|
|
5902
6055
|
destroyServices
|
|
5903
6056
|
};
|
|
5904
|
-
//# sourceMappingURL=chunk-
|
|
6057
|
+
//# sourceMappingURL=chunk-PZE2MO7H.js.map
|