@o-lang/semantic-doc-search 1.0.31 → 1.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/resolver.js +21 -13
package/package.json
CHANGED
package/src/resolver.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
const VectorRouter = require("./adapters/vectorRouter");
|
|
2
|
-
const embedder = require("./embeddings/local"); //
|
|
2
|
+
const embedder = require("./embeddings/local"); // singleton embedder
|
|
3
3
|
const { extractQuery } = require("./utils/extractQuery");
|
|
4
4
|
const { formatResults } = require("./utils/formatResults");
|
|
5
5
|
const fs = require("fs");
|
|
@@ -28,6 +28,15 @@ function hashContent(str) {
|
|
|
28
28
|
return crypto.createHash("sha256").update(str).digest("hex");
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Clean text for embedding (defensive)
|
|
33
|
+
*/
|
|
34
|
+
function sanitizeTextForEmbedding(text) {
|
|
35
|
+
if (typeof text !== "string") return "";
|
|
36
|
+
// Remove wrapping quotes and extra whitespace
|
|
37
|
+
return text.replace(/^["']|["']$/g, "").trim();
|
|
38
|
+
}
|
|
39
|
+
|
|
31
40
|
/**
|
|
32
41
|
* Semantic Doc Search Resolver
|
|
33
42
|
*/
|
|
@@ -35,7 +44,9 @@ async function resolver(action, context = {}) {
|
|
|
35
44
|
if (typeof action !== "string") return;
|
|
36
45
|
if (!action.toLowerCase().startsWith("ask doc-search")) return;
|
|
37
46
|
|
|
38
|
-
|
|
47
|
+
let query = extractQuery(action);
|
|
48
|
+
query = sanitizeTextForEmbedding(query);
|
|
49
|
+
if (!query) return { text: "(Empty query)", meta: { matches: 0 } };
|
|
39
50
|
|
|
40
51
|
// Vector backend
|
|
41
52
|
const vectorStore = VectorRouter.create(context);
|
|
@@ -62,17 +73,14 @@ async function resolver(action, context = {}) {
|
|
|
62
73
|
for (const doc of context.documents) {
|
|
63
74
|
const chunks = doc.chunks || [doc.content];
|
|
64
75
|
for (let i = 0; i < chunks.length; i++) {
|
|
65
|
-
const text = chunks[i];
|
|
76
|
+
const text = sanitizeTextForEmbedding(chunks[i]);
|
|
77
|
+
if (!text) continue;
|
|
78
|
+
|
|
66
79
|
const hash = hashContent(text);
|
|
67
80
|
if (cache[hash]) continue; // Skip already ingested
|
|
68
81
|
|
|
69
82
|
const vector = await embedder.embed(text);
|
|
70
|
-
|
|
71
|
-
// ✅ Defensive: skip invalid vectors
|
|
72
|
-
if (!Array.isArray(vector) || vector.length !== embedder.getDimension() || vector.every(v => v === 0)) {
|
|
73
|
-
console.warn(`⚠️ Skipping invalid vector for doc ${doc.id}:${i}`);
|
|
74
|
-
continue;
|
|
75
|
-
}
|
|
83
|
+
if (!vector || vector.every(v => v === 0)) continue;
|
|
76
84
|
|
|
77
85
|
await vectorStore.upsert({
|
|
78
86
|
id: `${doc.id}:${i}`,
|
|
@@ -89,12 +97,12 @@ async function resolver(action, context = {}) {
|
|
|
89
97
|
|
|
90
98
|
// Embed query & search
|
|
91
99
|
const queryVector = await embedder.embed(query);
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
throw new Error("Query embedding invalid or not a proper array");
|
|
100
|
+
if (!queryVector || queryVector.every(v => v === 0)) {
|
|
101
|
+
console.warn("⚠️ Query embedding invalid");
|
|
102
|
+
return { text: "(Query could not be embedded)", meta: { matches: 0 } };
|
|
96
103
|
}
|
|
97
104
|
|
|
105
|
+
// Top-K + similarity threshold
|
|
98
106
|
const results = await vectorStore.query({
|
|
99
107
|
vector: queryVector,
|
|
100
108
|
topK: context.topK || 5,
|