@o-lang/semantic-doc-search 1.0.31 → 1.0.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/resolver.js +21 -13
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.31",
3
+ "version": "1.0.33",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
package/src/resolver.js CHANGED
@@ -1,5 +1,5 @@
1
1
  const VectorRouter = require("./adapters/vectorRouter");
2
- const embedder = require("./embeddings/local"); // singleton embedder
2
+ const embedder = require("./embeddings/local"); // singleton embedder
3
3
  const { extractQuery } = require("./utils/extractQuery");
4
4
  const { formatResults } = require("./utils/formatResults");
5
5
  const fs = require("fs");
@@ -28,6 +28,15 @@ function hashContent(str) {
28
28
  return crypto.createHash("sha256").update(str).digest("hex");
29
29
  }
30
30
 
31
+ /**
32
+ * Clean text for embedding (defensive)
33
+ */
34
+ function sanitizeTextForEmbedding(text) {
35
+ if (typeof text !== "string") return "";
36
+ // Remove wrapping quotes and extra whitespace
37
+ return text.replace(/^["']|["']$/g, "").trim();
38
+ }
39
+
31
40
  /**
32
41
  * Semantic Doc Search Resolver
33
42
  */
@@ -35,7 +44,9 @@ async function resolver(action, context = {}) {
35
44
  if (typeof action !== "string") return;
36
45
  if (!action.toLowerCase().startsWith("ask doc-search")) return;
37
46
 
38
- const query = extractQuery(action);
47
+ let query = extractQuery(action);
48
+ query = sanitizeTextForEmbedding(query);
49
+ if (!query) return { text: "(Empty query)", meta: { matches: 0 } };
39
50
 
40
51
  // Vector backend
41
52
  const vectorStore = VectorRouter.create(context);
@@ -62,17 +73,14 @@ async function resolver(action, context = {}) {
62
73
  for (const doc of context.documents) {
63
74
  const chunks = doc.chunks || [doc.content];
64
75
  for (let i = 0; i < chunks.length; i++) {
65
- const text = chunks[i];
76
+ const text = sanitizeTextForEmbedding(chunks[i]);
77
+ if (!text) continue;
78
+
66
79
  const hash = hashContent(text);
67
80
  if (cache[hash]) continue; // Skip already ingested
68
81
 
69
82
  const vector = await embedder.embed(text);
70
-
71
- // ✅ Defensive: skip invalid vectors
72
- if (!Array.isArray(vector) || vector.length !== embedder.getDimension() || vector.every(v => v === 0)) {
73
- console.warn(`⚠️ Skipping invalid vector for doc ${doc.id}:${i}`);
74
- continue;
75
- }
83
+ if (!vector || vector.every(v => v === 0)) continue;
76
84
 
77
85
  await vectorStore.upsert({
78
86
  id: `${doc.id}:${i}`,
@@ -89,12 +97,12 @@ async function resolver(action, context = {}) {
89
97
 
90
98
  // Embed query & search
91
99
  const queryVector = await embedder.embed(query);
92
-
93
- // Defensive: validate query vector
94
- if (!Array.isArray(queryVector) || queryVector.length !== embedder.getDimension()) {
95
- throw new Error("Query embedding invalid or not a proper array");
100
+ if (!queryVector || queryVector.every(v => v === 0)) {
101
+ console.warn("⚠️ Query embedding invalid");
102
+ return { text: "(Query could not be embedded)", meta: { matches: 0 } };
96
103
  }
97
104
 
105
+ // Top-K + similarity threshold
98
106
  const results = await vectorStore.query({
99
107
  vector: queryVector,
100
108
  topK: context.topK || 5,