bluera-knowledge 0.19.7 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/{chunk-TWX7MN5L.js → chunk-26MBEEKM.js} +2 -2
- package/dist/{chunk-6BC5OG4M.js → chunk-OZIVRLZE.js} +2 -2
- package/dist/{chunk-JPJI3VMA.js → chunk-PZE2MO7H.js} +144 -36
- package/dist/chunk-PZE2MO7H.js.map +1 -0
- package/dist/index.js +4 -4
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.d.ts +52 -7
- package/dist/mcp/server.js +2 -2
- package/dist/workers/background-worker-cli.js +3 -3
- package/dist/workers/background-worker-cli.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-JPJI3VMA.js.map +0 -1
- /package/dist/{chunk-TWX7MN5L.js.map → chunk-26MBEEKM.js.map} +0 -0
- /package/dist/{chunk-6BC5OG4M.js.map → chunk-OZIVRLZE.js.map} +0 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [0.20.0](https://github.com/blueraai/bluera-knowledge/compare/v0.19.7...v0.20.0) (2026-02-01)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Features
|
|
9
|
+
|
|
10
|
+
* **embeddings:** add query/document prefix support for asymmetric models ([51a1a38](https://github.com/blueraai/bluera-knowledge/commit/51a1a380368a3a077b01e799ab65ff449eed8a46))
|
|
11
|
+
* **embeddings:** add state query and reset methods ([5275e28](https://github.com/blueraai/bluera-knowledge/commit/5275e283cdf4a72e21e44b08fc2075d4fee88aa7))
|
|
12
|
+
* **hooks:** add async execution and improve BK usage reminders ([5576a9f](https://github.com/blueraai/bluera-knowledge/commit/5576a9fdd0d0e9a05193c2b700e600e6756736e4))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### Bug Fixes
|
|
16
|
+
|
|
17
|
+
* **gitignore:** correct pattern ordering for bluera-base ([f9b38c8](https://github.com/blueraai/bluera-knowledge/commit/f9b38c80ee4b4d84330fef43eef2f6f452b78a61))
|
|
18
|
+
|
|
5
19
|
## [0.19.7](https://github.com/blueraai/bluera-knowledge/compare/v0.19.4...v0.19.7) (2026-01-31)
|
|
6
20
|
|
|
7
21
|
|
|
@@ -2,7 +2,7 @@ import {
|
|
|
2
2
|
createLogger,
|
|
3
3
|
summarizePayload,
|
|
4
4
|
truncateForLog
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-PZE2MO7H.js";
|
|
6
6
|
|
|
7
7
|
// src/crawl/intelligent-crawler.ts
|
|
8
8
|
import { EventEmitter } from "events";
|
|
@@ -916,4 +916,4 @@ export {
|
|
|
916
916
|
getCrawlStrategy,
|
|
917
917
|
IntelligentCrawler
|
|
918
918
|
};
|
|
919
|
-
//# sourceMappingURL=chunk-
|
|
919
|
+
//# sourceMappingURL=chunk-26MBEEKM.js.map
|
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
isRepoStoreDefinition,
|
|
10
10
|
isWebStoreDefinition,
|
|
11
11
|
summarizePayload
|
|
12
|
-
} from "./chunk-
|
|
12
|
+
} from "./chunk-PZE2MO7H.js";
|
|
13
13
|
|
|
14
14
|
// src/mcp/server.ts
|
|
15
15
|
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
@@ -2202,4 +2202,4 @@ export {
|
|
|
2202
2202
|
createMCPServer,
|
|
2203
2203
|
runMCPServer
|
|
2204
2204
|
};
|
|
2205
|
-
//# sourceMappingURL=chunk-
|
|
2205
|
+
//# sourceMappingURL=chunk-OZIVRLZE.js.map
|
|
@@ -2064,7 +2064,13 @@ var DEFAULT_CONFIG = {
|
|
|
2064
2064
|
dataDir: ".bluera/bluera-knowledge/data",
|
|
2065
2065
|
embedding: {
|
|
2066
2066
|
model: "Xenova/all-MiniLM-L6-v2",
|
|
2067
|
-
batchSize: 32
|
|
2067
|
+
batchSize: 32,
|
|
2068
|
+
dtype: "fp32",
|
|
2069
|
+
pooling: "mean",
|
|
2070
|
+
normalize: true,
|
|
2071
|
+
queryPrefix: "",
|
|
2072
|
+
docPrefix: "",
|
|
2073
|
+
maxInFlightBatches: 1
|
|
2068
2074
|
},
|
|
2069
2075
|
indexing: {
|
|
2070
2076
|
concurrency: 4,
|
|
@@ -3974,7 +3980,7 @@ var SearchService = class {
|
|
|
3974
3980
|
* Returns results with raw cosine similarity scores [0-1].
|
|
3975
3981
|
*/
|
|
3976
3982
|
async vectorSearchRaw(query, stores, limit) {
|
|
3977
|
-
const queryVector = await this.embeddingEngine.
|
|
3983
|
+
const queryVector = await this.embeddingEngine.embedQuery(query);
|
|
3978
3984
|
const results = [];
|
|
3979
3985
|
for (const storeId of stores) {
|
|
3980
3986
|
const hits = await this.lanceStore.search(storeId, queryVector, limit);
|
|
@@ -5418,6 +5424,16 @@ import { homedir as homedir2 } from "os";
|
|
|
5418
5424
|
import { join as join11 } from "path";
|
|
5419
5425
|
import { pipeline, env } from "@huggingface/transformers";
|
|
5420
5426
|
env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
|
|
5427
|
+
var DEFAULT_EMBEDDING_CONFIG = {
|
|
5428
|
+
model: "Xenova/all-MiniLM-L6-v2",
|
|
5429
|
+
batchSize: 32,
|
|
5430
|
+
dtype: "fp32",
|
|
5431
|
+
pooling: "mean",
|
|
5432
|
+
normalize: true,
|
|
5433
|
+
queryPrefix: "",
|
|
5434
|
+
docPrefix: "",
|
|
5435
|
+
maxInFlightBatches: 1
|
|
5436
|
+
};
|
|
5421
5437
|
var EmbeddingEngine = class {
|
|
5422
5438
|
extractor = null;
|
|
5423
5439
|
initPromise = null;
|
|
@@ -5425,11 +5441,9 @@ var EmbeddingEngine = class {
|
|
|
5425
5441
|
_dimensions = null;
|
|
5426
5442
|
// eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
|
|
5427
5443
|
disposed = false;
|
|
5428
|
-
|
|
5429
|
-
|
|
5430
|
-
|
|
5431
|
-
this.modelName = modelName;
|
|
5432
|
-
this.batchSize = batchSize;
|
|
5444
|
+
config;
|
|
5445
|
+
constructor(config = DEFAULT_EMBEDDING_CONFIG) {
|
|
5446
|
+
this.config = config;
|
|
5433
5447
|
}
|
|
5434
5448
|
/**
|
|
5435
5449
|
* Guard against use-after-dispose
|
|
@@ -5448,8 +5462,8 @@ var EmbeddingEngine = class {
|
|
|
5448
5462
|
if (this.extractor !== null) return;
|
|
5449
5463
|
this.initPromise ??= (async () => {
|
|
5450
5464
|
try {
|
|
5451
|
-
this.extractor = await pipeline("feature-extraction", this.
|
|
5452
|
-
dtype:
|
|
5465
|
+
this.extractor = await pipeline("feature-extraction", this.config.model, {
|
|
5466
|
+
dtype: this.config.dtype
|
|
5453
5467
|
});
|
|
5454
5468
|
} catch (error) {
|
|
5455
5469
|
this.initPromise = null;
|
|
@@ -5458,7 +5472,22 @@ var EmbeddingEngine = class {
|
|
|
5458
5472
|
})();
|
|
5459
5473
|
await this.initPromise;
|
|
5460
5474
|
}
|
|
5461
|
-
|
|
5475
|
+
/**
|
|
5476
|
+
* Embed a search query. Applies queryPrefix for asymmetric models.
|
|
5477
|
+
*/
|
|
5478
|
+
async embedQuery(text) {
|
|
5479
|
+
return this.embedText(this.config.queryPrefix + text);
|
|
5480
|
+
}
|
|
5481
|
+
/**
|
|
5482
|
+
* Embed a document for indexing. Applies docPrefix for asymmetric models.
|
|
5483
|
+
*/
|
|
5484
|
+
async embedDocument(text) {
|
|
5485
|
+
return this.embedText(this.config.docPrefix + text);
|
|
5486
|
+
}
|
|
5487
|
+
/**
|
|
5488
|
+
* Internal: embed text without prefix.
|
|
5489
|
+
*/
|
|
5490
|
+
async embedText(text) {
|
|
5462
5491
|
this.assertNotDisposed();
|
|
5463
5492
|
if (this.extractor === null) {
|
|
5464
5493
|
await this.initialize();
|
|
@@ -5467,13 +5496,17 @@ var EmbeddingEngine = class {
|
|
|
5467
5496
|
throw new Error("Failed to initialize embedding model");
|
|
5468
5497
|
}
|
|
5469
5498
|
const output = await this.extractor(text, {
|
|
5470
|
-
pooling:
|
|
5471
|
-
normalize:
|
|
5499
|
+
pooling: this.config.pooling,
|
|
5500
|
+
normalize: this.config.normalize
|
|
5472
5501
|
});
|
|
5473
|
-
const
|
|
5474
|
-
this._dimensions ??=
|
|
5475
|
-
return
|
|
5502
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5503
|
+
this._dimensions ??= dim;
|
|
5504
|
+
return Float32Array.from(output.data);
|
|
5476
5505
|
}
|
|
5506
|
+
/**
|
|
5507
|
+
* Embed a batch of documents with optional parallelism.
|
|
5508
|
+
* When maxInFlightBatches > 1, processes multiple batches concurrently.
|
|
5509
|
+
*/
|
|
5477
5510
|
async embedBatch(texts) {
|
|
5478
5511
|
this.assertNotDisposed();
|
|
5479
5512
|
if (this.extractor === null) {
|
|
@@ -5482,26 +5515,79 @@ var EmbeddingEngine = class {
|
|
|
5482
5515
|
if (this.extractor === null) {
|
|
5483
5516
|
throw new Error("Failed to initialize embedding model");
|
|
5484
5517
|
}
|
|
5518
|
+
const batches = [];
|
|
5519
|
+
for (let i = 0; i < texts.length; i += this.config.batchSize) {
|
|
5520
|
+
batches.push(texts.slice(i, i + this.config.batchSize));
|
|
5521
|
+
}
|
|
5522
|
+
if (batches.length === 0) {
|
|
5523
|
+
return [];
|
|
5524
|
+
}
|
|
5525
|
+
if (this.config.maxInFlightBatches <= 1) {
|
|
5526
|
+
return this.embedBatchesSequential(batches);
|
|
5527
|
+
} else {
|
|
5528
|
+
return this.embedBatchesConcurrent(batches);
|
|
5529
|
+
}
|
|
5530
|
+
}
|
|
5531
|
+
/**
|
|
5532
|
+
* Process batches sequentially (original behavior).
|
|
5533
|
+
*/
|
|
5534
|
+
async embedBatchesSequential(batches) {
|
|
5485
5535
|
const results = [];
|
|
5486
|
-
for (let i = 0; i <
|
|
5487
|
-
const batch =
|
|
5488
|
-
|
|
5489
|
-
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5493
|
-
for (let b = 0; b < batch.length; b++) {
|
|
5494
|
-
const start = b * dim;
|
|
5495
|
-
const end = start + dim;
|
|
5496
|
-
results.push(Array.from(output.data.slice(start, end), (v) => Number(v)));
|
|
5497
|
-
}
|
|
5498
|
-
this._dimensions ??= dim;
|
|
5499
|
-
if (i + this.batchSize < texts.length) {
|
|
5536
|
+
for (let i = 0; i < batches.length; i++) {
|
|
5537
|
+
const batch = batches[i];
|
|
5538
|
+
if (batch === void 0) continue;
|
|
5539
|
+
const batchResults = await this.processSingleBatch(batch);
|
|
5540
|
+
results.push(...batchResults);
|
|
5541
|
+
if (i < batches.length - 1) {
|
|
5500
5542
|
await new Promise((resolve4) => setImmediate(resolve4));
|
|
5501
5543
|
}
|
|
5502
5544
|
}
|
|
5503
5545
|
return results;
|
|
5504
5546
|
}
|
|
5547
|
+
/**
|
|
5548
|
+
* Process batches with controlled concurrency.
|
|
5549
|
+
*/
|
|
5550
|
+
async embedBatchesConcurrent(batches) {
|
|
5551
|
+
const results = new Array(batches.length);
|
|
5552
|
+
let inFlight = 0;
|
|
5553
|
+
const maxConcurrent = this.config.maxInFlightBatches;
|
|
5554
|
+
await Promise.all(
|
|
5555
|
+
batches.map(async (batch, idx) => {
|
|
5556
|
+
while (inFlight >= maxConcurrent) {
|
|
5557
|
+
await new Promise((resolve4) => setImmediate(resolve4));
|
|
5558
|
+
}
|
|
5559
|
+
inFlight++;
|
|
5560
|
+
try {
|
|
5561
|
+
results[idx] = await this.processSingleBatch(batch);
|
|
5562
|
+
} finally {
|
|
5563
|
+
inFlight--;
|
|
5564
|
+
}
|
|
5565
|
+
})
|
|
5566
|
+
);
|
|
5567
|
+
return results.flat();
|
|
5568
|
+
}
|
|
5569
|
+
/**
|
|
5570
|
+
* Process a single batch and return embeddings.
|
|
5571
|
+
*/
|
|
5572
|
+
async processSingleBatch(batch) {
|
|
5573
|
+
if (this.extractor === null) {
|
|
5574
|
+
throw new Error("Extractor not initialized");
|
|
5575
|
+
}
|
|
5576
|
+
const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
|
|
5577
|
+
const output = await this.extractor(prefixedBatch, {
|
|
5578
|
+
pooling: this.config.pooling,
|
|
5579
|
+
normalize: this.config.normalize
|
|
5580
|
+
});
|
|
5581
|
+
const dim = output.dims[output.dims.length - 1] ?? 0;
|
|
5582
|
+
const batchResults = [];
|
|
5583
|
+
for (let b = 0; b < batch.length; b++) {
|
|
5584
|
+
const start = b * dim;
|
|
5585
|
+
const end = start + dim;
|
|
5586
|
+
batchResults.push(Float32Array.from(output.data.slice(start, end)));
|
|
5587
|
+
}
|
|
5588
|
+
this._dimensions ??= dim;
|
|
5589
|
+
return batchResults;
|
|
5590
|
+
}
|
|
5505
5591
|
/**
|
|
5506
5592
|
* Get cached embedding dimensions. Throws if embed() hasn't been called yet.
|
|
5507
5593
|
* Use ensureDimensions() if you need to guarantee dimensions are available.
|
|
@@ -5512,13 +5598,38 @@ var EmbeddingEngine = class {
|
|
|
5512
5598
|
}
|
|
5513
5599
|
return this._dimensions;
|
|
5514
5600
|
}
|
|
5601
|
+
/**
|
|
5602
|
+
* Check if the embedding pipeline is initialized.
|
|
5603
|
+
*/
|
|
5604
|
+
isInitialized() {
|
|
5605
|
+
return this.extractor !== null;
|
|
5606
|
+
}
|
|
5607
|
+
/**
|
|
5608
|
+
* Check if this engine has been disposed.
|
|
5609
|
+
*/
|
|
5610
|
+
isDisposed() {
|
|
5611
|
+
return this.disposed;
|
|
5612
|
+
}
|
|
5613
|
+
/**
|
|
5614
|
+
* Reset the engine to uninitialized state, allowing reuse after disposal.
|
|
5615
|
+
* If currently initialized, disposes the pipeline first.
|
|
5616
|
+
*/
|
|
5617
|
+
async reset() {
|
|
5618
|
+
if (this.extractor !== null) {
|
|
5619
|
+
await this.extractor.dispose();
|
|
5620
|
+
this.extractor = null;
|
|
5621
|
+
}
|
|
5622
|
+
this.initPromise = null;
|
|
5623
|
+
this._dimensions = null;
|
|
5624
|
+
this.disposed = false;
|
|
5625
|
+
}
|
|
5515
5626
|
/**
|
|
5516
5627
|
* Ensure dimensions are available, initializing the model if needed.
|
|
5517
5628
|
* Returns the embedding dimensions for the current model.
|
|
5518
5629
|
*/
|
|
5519
5630
|
async ensureDimensions() {
|
|
5520
5631
|
if (this._dimensions === null) {
|
|
5521
|
-
await this.
|
|
5632
|
+
await this.embedText("dimension probe");
|
|
5522
5633
|
}
|
|
5523
5634
|
if (this._dimensions === null) {
|
|
5524
5635
|
throw new Error("Failed to determine embedding dimensions");
|
|
@@ -5734,10 +5845,7 @@ var LazyServiceContainer = class {
|
|
|
5734
5845
|
get embeddings() {
|
|
5735
5846
|
if (this._embeddings === null) {
|
|
5736
5847
|
logger4.debug("Lazy-initializing EmbeddingEngine");
|
|
5737
|
-
this._embeddings = new EmbeddingEngine(
|
|
5738
|
-
this.appConfig.embedding.model,
|
|
5739
|
-
this.appConfig.embedding.batchSize
|
|
5740
|
-
);
|
|
5848
|
+
this._embeddings = new EmbeddingEngine(this.appConfig.embedding);
|
|
5741
5849
|
}
|
|
5742
5850
|
return this._embeddings;
|
|
5743
5851
|
}
|
|
@@ -5840,7 +5948,7 @@ async function createServices(configPath, dataDir, projectRoot) {
|
|
|
5840
5948
|
const pythonBridge = new PythonBridge();
|
|
5841
5949
|
await pythonBridge.start();
|
|
5842
5950
|
const lance = new LanceStore(resolvedDataDir);
|
|
5843
|
-
const embeddings = new EmbeddingEngine(appConfig.embedding
|
|
5951
|
+
const embeddings = new EmbeddingEngine(appConfig.embedding);
|
|
5844
5952
|
await embeddings.initialize();
|
|
5845
5953
|
const resolvedProjectRoot = config.resolveProjectRoot();
|
|
5846
5954
|
const definitionService = new StoreDefinitionService(resolvedProjectRoot);
|
|
@@ -5946,4 +6054,4 @@ export {
|
|
|
5946
6054
|
createServices,
|
|
5947
6055
|
destroyServices
|
|
5948
6056
|
};
|
|
5949
|
-
//# sourceMappingURL=chunk-
|
|
6057
|
+
//# sourceMappingURL=chunk-PZE2MO7H.js.map
|