npm - @o-lang/semantic-doc-search - Versions diffs - 1.0.41 → 1.0.43 - Mend

@o-lang/semantic-doc-search 1.0.41 → 1.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/package.json +40 -38
package/src/embeddings/local.js +12 -2
package/src/embeddings/local.js.bak +153 -0
package/src/index.js +1 -1
package/.cache/embeddings/Xenova/all-MiniLM-L6-v2/config.json +0 -25
package/.cache/embeddings/Xenova/all-MiniLM-L6-v2/onnx/model_quantized.onnx +0 -0
package/.cache/embeddings/Xenova/all-MiniLM-L6-v2/tokenizer.json +0 -30686
package/.cache/embeddings/Xenova/all-MiniLM-L6-v2/tokenizer_config.json +0 -15
package/.env.example +0 -0
package/bin/cli.js +0 -58
package/docs/sample1.txt +0 -1
package/docs/vacation policy +0 -5
package/embeddings.json +0 -3
package/test-doc-search-batch.js +0 -36
package/test-doc-search.js +0 -40
package/test-embed.js +0 -10
package/test-single-doc.js +0 -32

package/.cache/embeddings/Xenova/all-MiniLM-L6-v2/tokenizer_config.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_basic_tokenize": true,
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "never_split": null,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
-}

package/.env.example DELETED Viewed

File without changes

package/bin/cli.js DELETED Viewed

@@ -1,58 +0,0 @@
-#!/usr/bin/env node
-const yargs = require("yargs");
-const { hideBin } = require("yargs/helpers");
-const resolver = require("../src/index.js");
-const argv = yargs(hideBin(process.argv))
-  .usage("Usage: $0 <query> [options]")
-  .option("provider", {
-    type: "string",
-    describe: "LLM provider: local | openai | groq | anthropic",
-    default: "local",
-  })
-  .option("openai-key", { type: "string", describe: "OpenAI API key" })
-  .option("groq-key", { type: "string", describe: "Groq API key" })
-  .option("anthropic-key", { type: "string", describe: "Anthropic API key" })
-  .option("model", { type: "string", describe: "LLM model to use" })
-  .option("doc-root", { type: "string", describe: "Directory of documents" })
-  .option("stream", { type: "boolean", describe: "Stream output if supported", default: false })
-  .option("vector-backend", {
-    type: "string",
-    describe: "Vector backend to use: pgvector | memory | pinecone | redis",
-    default: "pgvector"
-  })
-  .demandCommand(1, "Please provide a query")
-  .help()
-  .argv;
-// Build context for resolver
-const context = {
-  query: argv._.join(" "),
-  doc_root: argv.docRoot,
-  stream: argv.stream,
-  vectorBackend: argv["vector-backend"], // NEW
-  options: {
-    provider: argv.provider,
-    openaiApiKey: argv["openai-key"] || process.env.OPENAI_API_KEY,
-    groqApiKey: argv["groq-key"] || process.env.GROQ_API_KEY,
-    anthropicApiKey: argv["anthropic-key"] || process.env.ANTHROPIC_API_KEY,
-    model: argv.model,
-  },
-  onToken: token => {
-    if (argv.stream) process.stdout.write(token);
-  },
-};
-(async () => {
-  try {
-    // Pass vectorBackend in the config
-    const result = await resolver("search", context);
-    if (!argv.stream) {
-      console.log("\n\n✅ Result:\n");
-      console.log(result.text, "\n");
-      console.log("Meta:", result.meta);
-    }
-  } catch (err) {
-    console.error("\n❌ Error running search:", err);
-  }
-})();

package/docs/sample1.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- Semantic search is a technique that improves search results by understanding the meaning of words in a query rather than relying solely on keyword matching.

package/docs/vacation policy DELETED Viewed

@@ -1,5 +0,0 @@
-# Vacation Policy 2025
-Full-time employees accrue 20 days of paid time off annually.
-Part-time employees accrue leave on a pro-rata basis.
-New employees begin accruing vacation immediately upon hire.

package/embeddings.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "d17dd8bfdf22150455121531195faaa533b90a80ae55ac2d8615e517ea25bf59": true
-}

package/test-doc-search-batch.js DELETED Viewed

@@ -1,36 +0,0 @@
-// test-doc-search-batch.js
-const docSearchResolver = require("./src/index.js");
-(async () => {
-  try {
-    const context = {
-      doc_root: "./docs",        // folder with .txt or .md files
-      vectorBackend: "memory",   // can also switch to "pgvector" if configured
-    };
-    const queries = [
-      "Semantic search",
-      "Vacation policy",
-      "Employee onboarding",
-      "Leave requests",
-      "HR compliance"
-    ];
-    console.log("🔎 Running batch doc-search...");
-    for (const query of queries) {
-      const action = `Ask doc-search "${query}"`;
-      const result = await docSearchResolver(action, context);
-      console.log("\n====================================");
-      console.log(`Query: "${query}"`);
-      console.log("Text:\n", result.text || "(No matches found)");
-      console.log("Meta:", result.meta);
-      console.log("====================================");
-    }
-    console.log("\n✅ Batch search complete!");
-  } catch (err) {
-    console.error("❌ Batch doc-search test failed:", err);
-  }
-})();

package/test-doc-search.js DELETED Viewed

@@ -1,40 +0,0 @@
-// test-doc-search.js
-const resolver = require('./src/resolver');
-async function testDocSearch() {
-  console.log('🧪 Testing doc-search resolver...');
-  const fs = require('fs');
-  const path = require('path');
-  const testDir = './test-docs';
-  if (!fs.existsSync(testDir)) fs.mkdirSync(testDir);
-  fs.writeFileSync(path.join(testDir, 'policy.md'),
-    '# Vacation Policy\nFull-time employees get 20 days PTO.\nNew hires accrue immediately.'
-  );
-  try {
-    const result = await resolver('Ask doc-search "vacation policy"', {
-      doc_root: './test-docs',
-      topK: 3,
-      minScore: 0
-    });
-    console.log('✅ SUCCESS!');
-    if (result?.matches?.length > 0) {
-      const fullText = result.matches.map(m => m.content).join('\n\n');
-      console.log('📄 Retrieved text length:', fullText.length);
-      console.log('🔍 First 100 chars:', fullText.substring(0, 100));
-    } else {
-      console.log('⚠️ No matches found (check minScore or embedding quality)');
-    }
-    fs.rmSync(testDir, { recursive: true, force: true });
-  } catch (err) {
-    console.error('❌ FAILED:', err);
-    if (fs.existsSync(testDir)) fs.rmSync(testDir, { recursive: true });
-    process.exit(1);
-  }
-}
-testDocSearch();

package/test-embed.js DELETED Viewed

@@ -1,10 +0,0 @@
-// test-embed.js
-const embedder = require("./src/embeddings/local");
-async function test() {
-  console.log("Model dimension:", embedder.getDimension());
-  const vector = await embedder.embed("hello world");
-  console.log("Embedding result:", vector?.length, vector);
-}
-test().catch(console.error);

package/test-single-doc.js DELETED Viewed

@@ -1,32 +0,0 @@
-// test-single-doc.js
-const path = require("path");
-const { LocalEmbedding } = require("./embeddings/local.js");
-const { chunkText } = require("./utils/chunker.js");
-const VectorRouter = require("./adapters/vectorRouter");
-(async () => {
-  const embedder = new LocalEmbedding();
-  const docPath = path.join(process.cwd(), "docs", "sample1.txt");
-  const fs = require("fs");
-  const content = fs.readFileSync(docPath, "utf8");
-  const chunks = chunkText(content, 500);
-  console.log(`Document split into ${chunks.length} chunk(s)`);
-  const vectorStore = VectorRouter.create({ backend: "memory", dimension: embedder.getDimension() });
-  for (let i = 0; i < chunks.length; i++) {
-    const vector = await embedder.embed(chunks[i]);
-    console.log(`Chunk ${i} embedding first 5 dims:`, vector.slice(0, 5));
-    await vectorStore.upsert({ id: `sample1:${i}`, vector, content: chunks[i], source: "file:sample1.txt" });
-  }
-  const query = "Semantic search";
-  const queryVector = await embedder.embed(query);
-  const results = await vectorStore.query(queryVector, { topK: 5 });
-  results.forEach((r, idx) => {
-    console.log(`Result ${idx}: score=${r.score.toFixed(3)} content=${r.content.substring(0, 50)}...`);
-  });
-})();