npm - @o-lang/semantic-doc-search - Versions diffs - 1.0.13 → 1.0.15 - Mend

@o-lang/semantic-doc-search 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/adapters/pgvectorAdapter.js +4 -13
package/src/embeddings/local.js +25 -55

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@o-lang/semantic-doc-search",
-  "version": "1.0.13",
+  "version": "1.0.15",
   "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
   "main": "src/index.js",
   "type": "commonjs",

package/src/adapters/pgvectorAdapter.js CHANGED Viewed

@@ -13,35 +13,26 @@ class PgVectorAdapter {
     });
   }
-  // Convert JavaScript array to PostgreSQL array format
-  // [1.0, 2.0, 3.0] -> {1.0,2.0,3.0}
-  arrayToPgArray(arr) {
-    return `{${arr.join(',')}}`;
-  }
   async upsert({ id, vector, content, source, metadata = {} }) {
-    // Convert to PostgreSQL array format (NOT JSON)
-    const pgVector = this.arrayToPgArray(vector);
+    // Pass vector as array parameter - let pg handle conversion
     await this.pool.query(
       `INSERT INTO doc_embeddings (id, embedding, content, source, metadata)
        VALUES ($1, $2::vector, $3, $4, $5::jsonb)
        ON CONFLICT (id) DO UPDATE
        SET embedding = $2::vector, content = $3, source = $4, metadata = $5::jsonb, updated_at = NOW()`,
-      [id, pgVector, content, source, JSON.stringify(metadata)]
+      [id, vector, content, source, JSON.stringify(metadata)]
     );
   }
   async query(vector, topK = 5) {
-    const pgVector = this.arrayToPgArray(vector);
+    // Pass vector as array parameter - let pg handle conversion
     const res = await this.pool.query(
       `SELECT id, content, source, metadata,
               1 - (embedding <=> $1::vector) AS score
        FROM doc_embeddings
        ORDER BY embedding <=> $1::vector
        LIMIT $2`,
-      [pgVector, topK]
+      [vector, topK]
     );
     return res.rows.map(row => ({

package/src/embeddings/local.js CHANGED Viewed

@@ -1,44 +1,47 @@
 // src/embeddings/local.js
-const { pipeline, env } = require('@xenova/transformers');
-// Configure transformers to work in Node.js
-env.allowLocalModels = true;
-env.backends.onnx.warmup = false; // Faster startup
 /**
  * LocalEmbedding - REAL semantic embeddings using all-MiniLM-L6-v2
- * This is a compact, high-quality sentence transformer that:
- * - Understands semantic meaning of text
- * - Produces embeddings with negative and positive values (-1 to 1)
- * - Works offline after first download
- * - Is optimized for CPU (no GPU required)
- * - Produces 384-dimensional vectors compatible with pgvector
+ * Uses dynamic import to work with ESM packages in CommonJS environment
  */
 class LocalEmbedding {
   constructor() {
-    this.dim = 384; // all-MiniLM-L6-v2 output dimension
+    this.dim = 384;
     this.modelPromise = null;
-    this.isModelLoading = false;
+    this.transformersPromise = null;
+  }
+  /**
+   * Lazy-load the @xenova/transformers package
+   */
+  async getTransformers() {
+    if (!this.transformersPromise) {
+      this.transformersPromise = import('@xenova/transformers');
+    }
+    return this.transformersPromise;
   }
   /**
-   * Lazy-load the embedding model (only loads when first needed)
+   * Lazy-load the embedding model
    */
   async getModel() {
     if (!this.modelPromise) {
-      this.isModelLoading = true;
+      const { pipeline, env } = await this.getTransformers();
+      // Configure transformers
+      env.allowLocalModels = true;
+      env.backends.onnx.warmup = false;
       console.log('🔄 Loading local embedding model (first run may take 1-2 minutes)...');
       this.modelPromise = pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', {
         revision: 'main',
-        cache_dir: './.cache/embeddings' // Cache model locally
+        cache_dir: './.cache/embeddings'
       }).then(model => {
         console.log('✅ Local embedding model loaded successfully!');
-        this.isModelLoading = false;
         return model;
       }).catch(error => {
         console.error('❌ Failed to load local embedding model:', error.message);
-        this.isModelLoading = false;
         throw error;
       });
     }
@@ -47,58 +50,32 @@ class LocalEmbedding {
   /**
    * Generate REAL semantic embedding for text
-   * @param {string} text - Input text to embed
-   * @returns {number[]} - 384-dimensional embedding vector with values typically between -1 and 1
    */
   async embed(text) {
     if (!text || !text.trim()) {
-      // Return zero vector for empty text
       return new Array(this.dim).fill(0);
     }
     try {
       const model = await this.getModel();
-      // Generate embedding with mean pooling and normalization
-      // This matches the standard sentence-transformers approach
       const output = await model(text, {
         pooling: 'mean',
         normalize: true
       });
-      // Convert Float32Array to regular array
-      const embedding = Array.from(output.data);
-      // Verify dimension
-      if (embedding.length !== this.dim) {
-        console.warn(`⚠️ Expected ${this.dim} dimensions, got ${embedding.length}`);
-        // Pad or truncate to correct dimension
-        if (embedding.length < this.dim) {
-          return [...embedding, ...new Array(this.dim - embedding.length).fill(0)];
-        } else {
-          return embedding.slice(0, this.dim);
-        }
-      }
-      return embedding;
+      return Array.from(output.data);
     } catch (error) {
-      console.error(`❌ Embedding generation failed for text: "${text.substring(0, 50)}..."`);
-      console.error('Error:', error.message);
-      // Fallback to zero vector to prevent complete failure
+      console.error(`❌ Embedding failed for: "${text.substring(0, 50)}..."`);
       return new Array(this.dim).fill(0);
     }
   }
   /**
-   * Batch embedding for multiple strings (processed sequentially to manage memory)
+   * Batch embedding for multiple strings
    */
   async embedBatch(textArray = []) {
     if (!Array.isArray(textArray)) {
       throw new Error("embedBatch expects an array of strings");
     }
     const embeddings = [];
     for (const text of textArray) {
       const embedding = await this.embed(text);
@@ -116,8 +93,7 @@ class LocalEmbedding {
 }
 /**
- * Convenience function for compatibility with existing code
- * Creates embedding with retry logic
+ * Convenience function for compatibility
  */
 async function createEmbeddingWithRetry(text, options = {}, retries = 2) {
   const embedder = new LocalEmbedding();
@@ -125,8 +101,6 @@ async function createEmbeddingWithRetry(text, options = {}, retries = 2) {
   for (let attempt = 1; attempt <= retries; attempt++) {
     try {
       const embedding = await embedder.embed(text);
-      // Verify embedding is valid (not all zeros)
       const isAllZeros = embedding.every(val => val === 0);
       if (isAllZeros && (text || '').trim()) {
         if (attempt === retries) {
@@ -134,17 +108,13 @@ async function createEmbeddingWithRetry(text, options = {}, retries = 2) {
         }
         throw new Error('Embedding returned all zeros');
       }
       return embedding;
     } catch (err) {
       if (attempt === retries) {
         console.error(`❌ All ${retries} attempts failed for embedding text: "${text.substring(0, 50)}..."`);
         throw err;
       }
       console.warn(`⚠️ Embedding attempt ${attempt} failed, retrying...`);
-      // Add small delay before retry
       await new Promise(resolve => setTimeout(resolve, 100 * attempt));
     }
   }