bluera-knowledge 0.19.7 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import {
4
4
  } from "./chunk-CLIMKLTW.js";
5
5
  import {
6
6
  parseIgnorePatternsForScanning
7
- } from "./chunk-HXBIIMYL.js";
7
+ } from "./chunk-H25AEF47.js";
8
8
  import {
9
9
  __require
10
10
  } from "./chunk-DGUM43GV.js";
@@ -2063,8 +2063,14 @@ var DEFAULT_CONFIG = {
2063
2063
  version: 1,
2064
2064
  dataDir: ".bluera/bluera-knowledge/data",
2065
2065
  embedding: {
2066
- model: "Xenova/all-MiniLM-L6-v2",
2067
- batchSize: 32
2066
+ model: "Xenova/bge-small-en-v1.5",
2067
+ batchSize: 32,
2068
+ dtype: "fp32",
2069
+ pooling: "mean",
2070
+ normalize: true,
2071
+ queryPrefix: "Represent this sentence for searching relevant passages: ",
2072
+ docPrefix: "",
2073
+ maxInFlightBatches: 1
2068
2074
  },
2069
2075
  indexing: {
2070
2076
  concurrency: 4,
@@ -3749,15 +3755,13 @@ function detectContentType(results) {
3749
3755
  }
3750
3756
  var SearchService = class {
3751
3757
  lanceStore;
3752
- embeddingEngine;
3753
3758
  codeUnitService;
3754
3759
  codeGraphService;
3755
3760
  graphCache;
3756
3761
  searchConfig;
3757
3762
  unsubscribeCacheInvalidation;
3758
- constructor(lanceStore, embeddingEngine, codeGraphService, searchConfig) {
3763
+ constructor(lanceStore, codeGraphService, searchConfig) {
3759
3764
  this.lanceStore = lanceStore;
3760
- this.embeddingEngine = embeddingEngine;
3761
3765
  this.codeUnitService = new CodeUnitService();
3762
3766
  this.codeGraphService = codeGraphService;
3763
3767
  this.graphCache = /* @__PURE__ */ new Map();
@@ -3972,17 +3976,17 @@ var SearchService = class {
3972
3976
  /**
3973
3977
  * Fetch raw vector search results without normalization.
3974
3978
  * Returns results with raw cosine similarity scores [0-1].
3979
+ * Uses LanceDB's embedding function for query embedding,
3980
+ * ensuring consistent query/document embedding through a single code path.
3975
3981
  */
3976
3982
  async vectorSearchRaw(query, stores, limit) {
3977
- const queryVector = await this.embeddingEngine.embed(query);
3978
3983
  const results = [];
3979
3984
  for (const storeId of stores) {
3980
- const hits = await this.lanceStore.search(storeId, queryVector, limit);
3985
+ const hits = await this.lanceStore.searchText(storeId, query, limit);
3981
3986
  results.push(
3982
3987
  ...hits.map((r) => ({
3983
3988
  id: r.id,
3984
3989
  score: r.score,
3985
- // Raw cosine similarity (1 - distance)
3986
3990
  content: r.content,
3987
3991
  metadata: r.metadata
3988
3992
  }))
@@ -4816,6 +4820,9 @@ function extractRepoName(url) {
4816
4820
  return name;
4817
4821
  }
4818
4822
 
4823
+ // src/types/store.ts
4824
+ var CURRENT_SCHEMA_VERSION = 2;
4825
+
4819
4826
  // src/services/store.service.ts
4820
4827
  async function fileExists4(path4) {
4821
4828
  try {
@@ -4830,12 +4837,21 @@ var StoreService = class {
4830
4837
  definitionService;
4831
4838
  gitignoreService;
4832
4839
  projectRoot;
4840
+ embeddingModelId;
4833
4841
  registry = { stores: [] };
4834
4842
  constructor(dataDir, options) {
4835
4843
  this.dataDir = dataDir;
4836
- this.definitionService = options?.definitionService ?? void 0;
4837
- this.gitignoreService = options?.gitignoreService ?? void 0;
4838
- this.projectRoot = options?.projectRoot ?? void 0;
4844
+ this.definitionService = options.definitionService ?? void 0;
4845
+ this.gitignoreService = options.gitignoreService ?? void 0;
4846
+ this.projectRoot = options.projectRoot ?? void 0;
4847
+ this.embeddingModelId = options.embeddingModelId;
4848
+ }
4849
+ /**
4850
+ * Get the current embedding model ID used for new stores.
4851
+ * Used by model compatibility validation.
4852
+ */
4853
+ getCurrentModelId() {
4854
+ return this.embeddingModelId;
4839
4855
  }
4840
4856
  async initialize() {
4841
4857
  await mkdir5(this.dataDir, { recursive: true });
@@ -4974,7 +4990,9 @@ var StoreService = class {
4974
4990
  tags: input.tags,
4975
4991
  status: "ready",
4976
4992
  createdAt: now,
4977
- updatedAt: now
4993
+ updatedAt: now,
4994
+ schemaVersion: CURRENT_SCHEMA_VERSION,
4995
+ modelId: this.embeddingModelId
4978
4996
  };
4979
4997
  break;
4980
4998
  }
@@ -5019,7 +5037,9 @@ var StoreService = class {
5019
5037
  tags: input.tags,
5020
5038
  status: "ready",
5021
5039
  createdAt: now,
5022
- updatedAt: now
5040
+ updatedAt: now,
5041
+ schemaVersion: CURRENT_SCHEMA_VERSION,
5042
+ modelId: this.embeddingModelId
5023
5043
  };
5024
5044
  break;
5025
5045
  }
@@ -5040,7 +5060,9 @@ var StoreService = class {
5040
5060
  tags: input.tags,
5041
5061
  status: "ready",
5042
5062
  createdAt: now,
5043
- updatedAt: now
5063
+ updatedAt: now,
5064
+ schemaVersion: CURRENT_SCHEMA_VERSION,
5065
+ modelId: this.embeddingModelId
5044
5066
  };
5045
5067
  break;
5046
5068
  default: {
@@ -5418,6 +5440,16 @@ import { homedir as homedir2 } from "os";
5418
5440
  import { join as join11 } from "path";
5419
5441
  import { pipeline, env } from "@huggingface/transformers";
5420
5442
  env.cacheDir = join11(homedir2(), ".cache", "huggingface-transformers");
5443
+ var DEFAULT_EMBEDDING_CONFIG = {
5444
+ model: "Xenova/bge-small-en-v1.5",
5445
+ batchSize: 32,
5446
+ dtype: "fp32",
5447
+ pooling: "mean",
5448
+ normalize: true,
5449
+ queryPrefix: "Represent this sentence for searching relevant passages: ",
5450
+ docPrefix: "",
5451
+ maxInFlightBatches: 1
5452
+ };
5421
5453
  var EmbeddingEngine = class {
5422
5454
  extractor = null;
5423
5455
  initPromise = null;
@@ -5425,11 +5457,9 @@ var EmbeddingEngine = class {
5425
5457
  _dimensions = null;
5426
5458
  // eslint-disable-next-line @typescript-eslint/prefer-readonly -- mutated in dispose()
5427
5459
  disposed = false;
5428
- modelName;
5429
- batchSize;
5430
- constructor(modelName = "Xenova/all-MiniLM-L6-v2", batchSize = 32) {
5431
- this.modelName = modelName;
5432
- this.batchSize = batchSize;
5460
+ config;
5461
+ constructor(config = DEFAULT_EMBEDDING_CONFIG) {
5462
+ this.config = config;
5433
5463
  }
5434
5464
  /**
5435
5465
  * Guard against use-after-dispose
@@ -5448,8 +5478,8 @@ var EmbeddingEngine = class {
5448
5478
  if (this.extractor !== null) return;
5449
5479
  this.initPromise ??= (async () => {
5450
5480
  try {
5451
- this.extractor = await pipeline("feature-extraction", this.modelName, {
5452
- dtype: "fp32"
5481
+ this.extractor = await pipeline("feature-extraction", this.config.model, {
5482
+ dtype: this.config.dtype
5453
5483
  });
5454
5484
  } catch (error) {
5455
5485
  this.initPromise = null;
@@ -5458,7 +5488,22 @@ var EmbeddingEngine = class {
5458
5488
  })();
5459
5489
  await this.initPromise;
5460
5490
  }
5461
- async embed(text) {
5491
+ /**
5492
+ * Embed a search query. Applies queryPrefix for asymmetric models.
5493
+ */
5494
+ async embedQuery(text) {
5495
+ return this.embedText(this.config.queryPrefix + text);
5496
+ }
5497
+ /**
5498
+ * Embed a document for indexing. Applies docPrefix for asymmetric models.
5499
+ */
5500
+ async embedDocument(text) {
5501
+ return this.embedText(this.config.docPrefix + text);
5502
+ }
5503
+ /**
5504
+ * Internal: embed text without prefix.
5505
+ */
5506
+ async embedText(text) {
5462
5507
  this.assertNotDisposed();
5463
5508
  if (this.extractor === null) {
5464
5509
  await this.initialize();
@@ -5467,13 +5512,17 @@ var EmbeddingEngine = class {
5467
5512
  throw new Error("Failed to initialize embedding model");
5468
5513
  }
5469
5514
  const output = await this.extractor(text, {
5470
- pooling: "mean",
5471
- normalize: true
5515
+ pooling: this.config.pooling,
5516
+ normalize: this.config.normalize
5472
5517
  });
5473
- const result = Array.from(output.data, (v) => Number(v));
5474
- this._dimensions ??= result.length;
5475
- return result;
5518
+ const dim = output.dims[output.dims.length - 1] ?? 0;
5519
+ this._dimensions ??= dim;
5520
+ return Float32Array.from(output.data);
5476
5521
  }
5522
+ /**
5523
+ * Embed a batch of documents with optional parallelism.
5524
+ * When maxInFlightBatches > 1, processes multiple batches concurrently.
5525
+ */
5477
5526
  async embedBatch(texts) {
5478
5527
  this.assertNotDisposed();
5479
5528
  if (this.extractor === null) {
@@ -5482,26 +5531,79 @@ var EmbeddingEngine = class {
5482
5531
  if (this.extractor === null) {
5483
5532
  throw new Error("Failed to initialize embedding model");
5484
5533
  }
5534
+ const batches = [];
5535
+ for (let i = 0; i < texts.length; i += this.config.batchSize) {
5536
+ batches.push(texts.slice(i, i + this.config.batchSize));
5537
+ }
5538
+ if (batches.length === 0) {
5539
+ return [];
5540
+ }
5541
+ if (this.config.maxInFlightBatches <= 1) {
5542
+ return this.embedBatchesSequential(batches);
5543
+ } else {
5544
+ return this.embedBatchesConcurrent(batches);
5545
+ }
5546
+ }
5547
+ /**
5548
+ * Process batches sequentially (original behavior).
5549
+ */
5550
+ async embedBatchesSequential(batches) {
5485
5551
  const results = [];
5486
- for (let i = 0; i < texts.length; i += this.batchSize) {
5487
- const batch = texts.slice(i, i + this.batchSize);
5488
- const output = await this.extractor(batch, {
5489
- pooling: "mean",
5490
- normalize: true
5491
- });
5492
- const dim = output.dims[output.dims.length - 1] ?? 0;
5493
- for (let b = 0; b < batch.length; b++) {
5494
- const start = b * dim;
5495
- const end = start + dim;
5496
- results.push(Array.from(output.data.slice(start, end), (v) => Number(v)));
5497
- }
5498
- this._dimensions ??= dim;
5499
- if (i + this.batchSize < texts.length) {
5552
+ for (let i = 0; i < batches.length; i++) {
5553
+ const batch = batches[i];
5554
+ if (batch === void 0) continue;
5555
+ const batchResults = await this.processSingleBatch(batch);
5556
+ results.push(...batchResults);
5557
+ if (i < batches.length - 1) {
5500
5558
  await new Promise((resolve4) => setImmediate(resolve4));
5501
5559
  }
5502
5560
  }
5503
5561
  return results;
5504
5562
  }
5563
+ /**
5564
+ * Process batches with controlled concurrency.
5565
+ */
5566
+ async embedBatchesConcurrent(batches) {
5567
+ const results = new Array(batches.length);
5568
+ let inFlight = 0;
5569
+ const maxConcurrent = this.config.maxInFlightBatches;
5570
+ await Promise.all(
5571
+ batches.map(async (batch, idx) => {
5572
+ while (inFlight >= maxConcurrent) {
5573
+ await new Promise((resolve4) => setImmediate(resolve4));
5574
+ }
5575
+ inFlight++;
5576
+ try {
5577
+ results[idx] = await this.processSingleBatch(batch);
5578
+ } finally {
5579
+ inFlight--;
5580
+ }
5581
+ })
5582
+ );
5583
+ return results.flat();
5584
+ }
5585
+ /**
5586
+ * Process a single batch and return embeddings.
5587
+ */
5588
+ async processSingleBatch(batch) {
5589
+ if (this.extractor === null) {
5590
+ throw new Error("Extractor not initialized");
5591
+ }
5592
+ const prefixedBatch = batch.map((text) => this.config.docPrefix + text);
5593
+ const output = await this.extractor(prefixedBatch, {
5594
+ pooling: this.config.pooling,
5595
+ normalize: this.config.normalize
5596
+ });
5597
+ const dim = output.dims[output.dims.length - 1] ?? 0;
5598
+ const batchResults = [];
5599
+ for (let b = 0; b < batch.length; b++) {
5600
+ const start = b * dim;
5601
+ const end = start + dim;
5602
+ batchResults.push(Float32Array.from(output.data.slice(start, end)));
5603
+ }
5604
+ this._dimensions ??= dim;
5605
+ return batchResults;
5606
+ }
5505
5607
  /**
5506
5608
  * Get cached embedding dimensions. Throws if embed() hasn't been called yet.
5507
5609
  * Use ensureDimensions() if you need to guarantee dimensions are available.
@@ -5512,13 +5614,38 @@ var EmbeddingEngine = class {
5512
5614
  }
5513
5615
  return this._dimensions;
5514
5616
  }
5617
+ /**
5618
+ * Check if the embedding pipeline is initialized.
5619
+ */
5620
+ isInitialized() {
5621
+ return this.extractor !== null;
5622
+ }
5623
+ /**
5624
+ * Check if this engine has been disposed.
5625
+ */
5626
+ isDisposed() {
5627
+ return this.disposed;
5628
+ }
5629
+ /**
5630
+ * Reset the engine to uninitialized state, allowing reuse after disposal.
5631
+ * If currently initialized, disposes the pipeline first.
5632
+ */
5633
+ async reset() {
5634
+ if (this.extractor !== null) {
5635
+ await this.extractor.dispose();
5636
+ this.extractor = null;
5637
+ }
5638
+ this.initPromise = null;
5639
+ this._dimensions = null;
5640
+ this.disposed = false;
5641
+ }
5515
5642
  /**
5516
5643
  * Ensure dimensions are available, initializing the model if needed.
5517
5644
  * Returns the embedding dimensions for the current model.
5518
5645
  */
5519
5646
  async ensureDimensions() {
5520
5647
  if (this._dimensions === null) {
5521
- await this.embed("dimension probe");
5648
+ await this.embedText("dimension probe");
5522
5649
  }
5523
5650
  if (this._dimensions === null) {
5524
5651
  throw new Error("Failed to determine embedding dimensions");
@@ -5543,6 +5670,88 @@ var EmbeddingEngine = class {
5543
5670
 
5544
5671
  // src/db/lance.ts
5545
5672
  import * as lancedb from "@lancedb/lancedb";
5673
+ import { LanceSchema } from "@lancedb/lancedb/embedding";
5674
+ import { Utf8 } from "apache-arrow";
5675
+
5676
+ // src/db/lance-embedding-function.ts
5677
+ import { TextEmbeddingFunction, getRegistry } from "@lancedb/lancedb/embedding";
5678
+ import { Float32 } from "apache-arrow";
5679
+ var HuggingFaceEmbeddingFunction = class extends TextEmbeddingFunction {
5680
+ engine;
5681
+ embeddingConfig;
5682
+ _ndims = null;
5683
+ constructor(optionsRaw) {
5684
+ super();
5685
+ const options = this.resolveVariables(optionsRaw ?? {});
5686
+ this.embeddingConfig = {
5687
+ model: options.model ?? "Xenova/bge-small-en-v1.5",
5688
+ batchSize: options.batchSize ?? 32,
5689
+ dtype: options.dtype ?? "fp32",
5690
+ pooling: options.pooling ?? "mean",
5691
+ normalize: options.normalize ?? true,
5692
+ queryPrefix: options.queryPrefix ?? "",
5693
+ docPrefix: options.docPrefix ?? "",
5694
+ maxInFlightBatches: 1
5695
+ // Single-threaded for LanceDB integration
5696
+ };
5697
+ this.engine = new EmbeddingEngine(this.embeddingConfig);
5698
+ }
5699
+ /**
5700
+ * Initialize the embedding model. Called by LanceDB before embeddings are computed.
5701
+ */
5702
+ async init() {
5703
+ this._ndims = await this.engine.ensureDimensions();
5704
+ }
5705
+ /**
5706
+ * Return embedding dimensions. Must call init() first.
5707
+ */
5708
+ ndims() {
5709
+ if (this._ndims === null) {
5710
+ throw new Error("HuggingFaceEmbeddingFunction not initialized. Call init() first.");
5711
+ }
5712
+ return this._ndims;
5713
+ }
5714
+ /**
5715
+ * Return embedding data type (always Float32 for our models).
5716
+ */
5717
+ embeddingDataType() {
5718
+ return new Float32();
5719
+ }
5720
+ /**
5721
+ * Generate embeddings for a batch of texts (documents).
5722
+ * Called during table.add() operations.
5723
+ */
5724
+ async generateEmbeddings(texts) {
5725
+ return this.engine.embedBatch(texts);
5726
+ }
5727
+ /**
5728
+ * Compute embedding for a single query.
5729
+ * Called during table.search(query) operations.
5730
+ */
5731
+ async computeQueryEmbeddings(data) {
5732
+ const embedding = await this.engine.embedQuery(data);
5733
+ return Array.from(embedding);
5734
+ }
5735
+ /**
5736
+ * Get the model ID for provenance tracking.
5737
+ */
5738
+ getModelId() {
5739
+ return this.embeddingConfig.model;
5740
+ }
5741
+ /**
5742
+ * Get the full embedding config.
5743
+ */
5744
+ getConfig() {
5745
+ return this.embeddingConfig;
5746
+ }
5747
+ /**
5748
+ * Dispose the underlying engine to free resources.
5749
+ */
5750
+ async dispose() {
5751
+ await this.engine.dispose();
5752
+ }
5753
+ };
5754
+ getRegistry().register("HuggingFaceEmbeddingFunction")(HuggingFaceEmbeddingFunction);
5546
5755
 
5547
5756
  // src/types/document.ts
5548
5757
  import { z as z5 } from "zod";
@@ -5560,15 +5769,51 @@ var DocumentMetadataSchema = z5.object({
5560
5769
  }).loose();
5561
5770
 
5562
5771
  // src/db/lance.ts
5772
+ function isSearchHit(value) {
5773
+ if (typeof value !== "object" || value === null) return false;
5774
+ return "id" in value && "content" in value && "metadata" in value && "_distance" in value && typeof value.id === "string" && typeof value.content === "string" && typeof value.metadata === "string" && typeof value._distance === "number";
5775
+ }
5776
+ function parseDocumentMetadata(jsonStr) {
5777
+ const parsed = DocumentMetadataSchema.parse(JSON.parse(jsonStr));
5778
+ return {
5779
+ ...parsed,
5780
+ storeId: createStoreId(parsed.storeId)
5781
+ };
5782
+ }
5563
5783
  var LanceStore = class {
5564
5784
  connection = null;
5565
5785
  tables = /* @__PURE__ */ new Map();
5566
5786
  dataDir;
5567
5787
  // eslint-disable-next-line @typescript-eslint/prefer-readonly -- set via setDimensions()
5568
5788
  _dimensions = null;
5789
+ embeddingFunction = null;
5569
5790
  constructor(dataDir) {
5570
5791
  this.dataDir = dataDir;
5571
5792
  }
5793
+ /**
5794
+ * Set the embedding function for auto-embedding queries.
5795
+ * Must be called before initialize() for new tables.
5796
+ * The embedding function is initialized and its dimensions are used for schema creation.
5797
+ */
5798
+ async setEmbeddingFunction(config) {
5799
+ this.embeddingFunction = new HuggingFaceEmbeddingFunction({
5800
+ model: config.model,
5801
+ batchSize: config.batchSize,
5802
+ dtype: config.dtype,
5803
+ pooling: config.pooling,
5804
+ normalize: config.normalize,
5805
+ queryPrefix: config.queryPrefix,
5806
+ docPrefix: config.docPrefix
5807
+ });
5808
+ await this.embeddingFunction.init();
5809
+ this._dimensions = this.embeddingFunction.ndims();
5810
+ }
5811
+ /**
5812
+ * Check if embedding function is available for auto-embedding queries.
5813
+ */
5814
+ hasEmbeddingFunction() {
5815
+ return this.embeddingFunction !== null;
5816
+ }
5572
5817
  /**
5573
5818
  * Set the embedding dimensions. Must be called before initialize().
5574
5819
  * This allows dimensions to be derived from the embedding model at runtime.
@@ -5579,22 +5824,35 @@ var LanceStore = class {
5579
5824
  }
5580
5825
  async initialize(storeId) {
5581
5826
  if (this._dimensions === null) {
5582
- throw new Error("Dimensions not set. Call setDimensions() before initialize().");
5827
+ throw new Error(
5828
+ "Dimensions not set. Call setDimensions() or setEmbeddingFunction() before initialize()."
5829
+ );
5583
5830
  }
5584
5831
  this.connection ??= await lancedb.connect(this.dataDir);
5585
5832
  const tableName = this.getTableName(storeId);
5586
5833
  const tableNames = await this.connection.tableNames();
5587
5834
  if (!tableNames.includes(tableName)) {
5588
- const table = await this.connection.createTable(tableName, [
5589
- {
5590
- id: "__init__",
5591
- content: "",
5592
- vector: new Array(this._dimensions).fill(0),
5593
- metadata: "{}"
5594
- }
5595
- ]);
5596
- await table.delete('id = "__init__"');
5597
- this.tables.set(tableName, table);
5835
+ if (this.embeddingFunction !== null) {
5836
+ const schema = LanceSchema({
5837
+ id: new Utf8(),
5838
+ content: this.embeddingFunction.sourceField(),
5839
+ vector: this.embeddingFunction.vectorField(),
5840
+ metadata: new Utf8()
5841
+ });
5842
+ const table = await this.connection.createEmptyTable(tableName, schema);
5843
+ this.tables.set(tableName, table);
5844
+ } else {
5845
+ const table = await this.connection.createTable(tableName, [
5846
+ {
5847
+ id: "__init__",
5848
+ content: "",
5849
+ vector: new Array(this._dimensions).fill(0),
5850
+ metadata: "{}"
5851
+ }
5852
+ ]);
5853
+ await table.delete('id = "__init__"');
5854
+ this.tables.set(tableName, table);
5855
+ }
5598
5856
  } else {
5599
5857
  const table = await this.connection.openTable(tableName);
5600
5858
  this.tables.set(tableName, table);
@@ -5638,6 +5896,29 @@ var LanceStore = class {
5638
5896
  };
5639
5897
  });
5640
5898
  }
5899
+ /**
5900
+ * Search using a text query with automatic embedding.
5901
+ * Requires setEmbeddingFunction() to have been called.
5902
+ * Uses the embedding function to compute query embeddings consistently with document embeddings.
5903
+ */
5904
+ async searchText(storeId, query, limit) {
5905
+ if (this.embeddingFunction === null) {
5906
+ throw new Error(
5907
+ "Embedding function not set. Call setEmbeddingFunction() before searchText()."
5908
+ );
5909
+ }
5910
+ const queryEmbedding = await this.embeddingFunction.computeQueryEmbeddings(query);
5911
+ const table = await this.getTable(storeId);
5912
+ const searchQuery = table.vectorSearch(queryEmbedding).limit(limit).distanceType("cosine");
5913
+ const rawResults = await searchQuery.toArray();
5914
+ const results = rawResults.filter(isSearchHit);
5915
+ return results.map((r) => ({
5916
+ id: createDocumentId(r.id),
5917
+ content: r.content,
5918
+ score: 1 - r._distance,
5919
+ metadata: parseDocumentMetadata(r.metadata)
5920
+ }));
5921
+ }
5641
5922
  async createFtsIndex(storeId) {
5642
5923
  const table = await this.getTable(storeId);
5643
5924
  await table.createIndex("content", {
@@ -5734,10 +6015,7 @@ var LazyServiceContainer = class {
5734
6015
  get embeddings() {
5735
6016
  if (this._embeddings === null) {
5736
6017
  logger4.debug("Lazy-initializing EmbeddingEngine");
5737
- this._embeddings = new EmbeddingEngine(
5738
- this.appConfig.embedding.model,
5739
- this.appConfig.embedding.batchSize
5740
- );
6018
+ this._embeddings = new EmbeddingEngine(this.appConfig.embedding);
5741
6019
  }
5742
6020
  return this._embeddings;
5743
6021
  }
@@ -5757,12 +6035,7 @@ var LazyServiceContainer = class {
5757
6035
  get search() {
5758
6036
  if (this._search === null) {
5759
6037
  logger4.debug("Lazy-initializing SearchService");
5760
- this._search = new SearchService(
5761
- this.lance,
5762
- this.embeddings,
5763
- this.codeGraph,
5764
- this.appConfig.search
5765
- );
6038
+ this._search = new SearchService(this.lance, this.codeGraph, this.appConfig.search);
5766
6039
  }
5767
6040
  return this._search;
5768
6041
  }
@@ -5821,7 +6094,8 @@ async function createLazyServices(configPath, dataDir, projectRoot) {
5821
6094
  const storeOptions = {
5822
6095
  definitionService,
5823
6096
  gitignoreService,
5824
- projectRoot: resolvedProjectRoot
6097
+ projectRoot: resolvedProjectRoot,
6098
+ embeddingModelId: appConfig.embedding.model
5825
6099
  };
5826
6100
  const store = new StoreService(resolvedDataDir, storeOptions);
5827
6101
  await store.initialize();
@@ -5840,21 +6114,23 @@ async function createServices(configPath, dataDir, projectRoot) {
5840
6114
  const pythonBridge = new PythonBridge();
5841
6115
  await pythonBridge.start();
5842
6116
  const lance = new LanceStore(resolvedDataDir);
5843
- const embeddings = new EmbeddingEngine(appConfig.embedding.model, appConfig.embedding.batchSize);
6117
+ const embeddings = new EmbeddingEngine(appConfig.embedding);
5844
6118
  await embeddings.initialize();
6119
+ await lance.setEmbeddingFunction(appConfig.embedding);
5845
6120
  const resolvedProjectRoot = config.resolveProjectRoot();
5846
6121
  const definitionService = new StoreDefinitionService(resolvedProjectRoot);
5847
6122
  const gitignoreService = new GitignoreService(resolvedProjectRoot);
5848
6123
  const storeOptions = {
5849
6124
  definitionService,
5850
6125
  gitignoreService,
5851
- projectRoot: resolvedProjectRoot
6126
+ projectRoot: resolvedProjectRoot,
6127
+ embeddingModelId: appConfig.embedding.model
5852
6128
  };
5853
6129
  const store = new StoreService(resolvedDataDir, storeOptions);
5854
6130
  await store.initialize();
5855
6131
  const codeGraph = new CodeGraphService(resolvedDataDir, pythonBridge);
5856
6132
  const manifest = new ManifestService(resolvedDataDir);
5857
- const search = new SearchService(lance, embeddings, codeGraph, appConfig.search);
6133
+ const search = new SearchService(lance, codeGraph, appConfig.search);
5858
6134
  const index = new IndexService(lance, embeddings, {
5859
6135
  codeGraphService: codeGraph,
5860
6136
  manifestService: manifest,
@@ -5946,4 +6222,4 @@ export {
5946
6222
  createServices,
5947
6223
  destroyServices
5948
6224
  };
5949
- //# sourceMappingURL=chunk-JPJI3VMA.js.map
6225
+ //# sourceMappingURL=chunk-ZR23KJPJ.js.map