@o-lang/semantic-doc-search 1.0.43 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/resolver.js +30 -44
- package/src/utils/extractQuery.js +24 -8
package/package.json
CHANGED
package/src/resolver.js
CHANGED
|
@@ -9,7 +9,7 @@ const crypto = require("crypto");
|
|
|
9
9
|
const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
10
10
|
|
|
11
11
|
// ─────────────────────────────────────────────
|
|
12
|
-
// Helpers
|
|
12
|
+
// Helpers
|
|
13
13
|
// ─────────────────────────────────────────────
|
|
14
14
|
function loadCache() {
|
|
15
15
|
try {
|
|
@@ -36,94 +36,80 @@ function hashText(str) {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
// ─────────────────────────────────────────────
|
|
39
|
-
// 🔥 MAIN RESOLVER
|
|
39
|
+
// 🔥 MAIN RESOLVER - Simplified: action type = resolver name
|
|
40
|
+
// Format: Action vector.search "arg1" "arg2" ...
|
|
41
|
+
// - 1 arg = ingest: "{doc_root}"
|
|
42
|
+
// - 2+ args = search: "{query}" "{doc_root}"
|
|
40
43
|
// ─────────────────────────────────────────────
|
|
41
44
|
async function resolver(action, context = {}) {
|
|
42
45
|
if (typeof action !== "string") return;
|
|
43
46
|
|
|
47
|
+
// Extract ALL quoted args from action string
|
|
48
|
+
const args = [...action.matchAll(/"([^"]*)"/g)].map(m => m[1]);
|
|
49
|
+
|
|
44
50
|
const vectorStore = VectorRouter.create(context);
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
const doc_root = context.doc_root || "./docs";
|
|
51
|
+
const getEmbedFn = await embedder({ dimension: 384 });
|
|
48
52
|
const useCache = !!context.POSTGRES_URL || !!context.REDIS_URL;
|
|
49
53
|
const cache = useCache ? loadCache() : {};
|
|
50
54
|
|
|
51
55
|
// =====================================================
|
|
52
|
-
// ✅ 1
|
|
56
|
+
// ✅ INGEST: 1 arg = doc_root
|
|
53
57
|
// =====================================================
|
|
54
|
-
if (
|
|
58
|
+
if (args.length === 1) {
|
|
55
59
|
let inserted = 0;
|
|
60
|
+
const ingestRoot = args[0] || context.doc_root || "./docs";
|
|
56
61
|
|
|
57
|
-
if (fs.existsSync(
|
|
58
|
-
const files = fs.readdirSync(
|
|
59
|
-
|
|
62
|
+
if (fs.existsSync(ingestRoot)) {
|
|
63
|
+
const files = fs.readdirSync(ingestRoot);
|
|
60
64
|
for (const file of files) {
|
|
61
|
-
const fullPath = path.join(
|
|
65
|
+
const fullPath = path.join(ingestRoot, file);
|
|
62
66
|
if (!fs.statSync(fullPath).isFile()) continue;
|
|
63
|
-
|
|
64
67
|
const content = fs.readFileSync(fullPath, "utf8");
|
|
65
68
|
if (!content) continue;
|
|
66
|
-
|
|
67
|
-
const chunkText = require("./utils/chunker").chunkText;
|
|
68
|
-
const chunks = chunkText(content, 500, 50);
|
|
69
|
-
|
|
69
|
+
const chunks = require("./utils/chunker").chunkText(content, 500, 50);
|
|
70
70
|
for (let i = 0; i < chunks.length; i++) {
|
|
71
71
|
const text = sanitizeTextForEmbedding(chunks[i]);
|
|
72
72
|
if (!text) continue;
|
|
73
|
-
|
|
74
73
|
const hash = hashText(text);
|
|
75
74
|
if (useCache && cache[hash]) continue;
|
|
76
|
-
|
|
77
|
-
const rawVector = await embed(text);
|
|
78
|
-
const vector = Array.from(rawVector);
|
|
79
|
-
|
|
75
|
+
const rawVector = await getEmbedFn(text);
|
|
80
76
|
await vectorStore.upsert({
|
|
81
77
|
id: `${file}:${i}`,
|
|
82
|
-
vector,
|
|
78
|
+
vector: Array.from(rawVector),
|
|
83
79
|
content: text,
|
|
84
80
|
source: `file:${file}`,
|
|
85
81
|
});
|
|
86
|
-
|
|
87
82
|
if (useCache) cache[hash] = true;
|
|
88
83
|
inserted++;
|
|
89
84
|
}
|
|
90
85
|
}
|
|
91
86
|
}
|
|
92
|
-
|
|
93
87
|
if (useCache) saveCache(cache);
|
|
94
88
|
if (vectorStore.close) await vectorStore.close();
|
|
95
|
-
|
|
96
|
-
return { inserted, doc_root };
|
|
89
|
+
return { inserted, doc_root: ingestRoot };
|
|
97
90
|
}
|
|
98
91
|
|
|
99
92
|
// =====================================================
|
|
100
|
-
// ✅ 2
|
|
93
|
+
// ✅ SEARCH: 2+ args = query + doc_root
|
|
101
94
|
// =====================================================
|
|
102
|
-
if (
|
|
103
|
-
const query = sanitizeTextForEmbedding(
|
|
95
|
+
if (args.length >= 2) {
|
|
96
|
+
const query = sanitizeTextForEmbedding(args[0]);
|
|
104
97
|
if (!query) return { text: "", matches: [] };
|
|
105
|
-
|
|
106
|
-
const
|
|
107
|
-
const queryVector = Array.from(rawQueryVector);
|
|
108
|
-
|
|
109
|
-
const results = await vectorStore.query(queryVector, {
|
|
110
|
-
topK: context.topK || 5,
|
|
111
|
-
});
|
|
112
|
-
|
|
98
|
+
const rawQueryVector = await getEmbedFn(query);
|
|
99
|
+
const results = await vectorStore.query(Array.from(rawQueryVector), { topK: context.topK || 5 });
|
|
113
100
|
if (vectorStore.close) await vectorStore.close();
|
|
114
|
-
|
|
115
101
|
return formatResults(results, query);
|
|
116
102
|
}
|
|
117
103
|
|
|
118
104
|
// =====================================================
|
|
119
|
-
// ❌
|
|
105
|
+
// ❌ Unknown action format
|
|
120
106
|
// =====================================================
|
|
121
|
-
|
|
122
|
-
|
|
107
|
+
console.warn(`⚠️ Unknown vector.search action format: "${action}"`);
|
|
123
108
|
return;
|
|
124
109
|
}
|
|
125
110
|
|
|
126
|
-
|
|
127
|
-
resolver.
|
|
111
|
+
// ✅ Must match workflow's "Allow resolvers: - vector.search"
|
|
112
|
+
resolver.resolverName = "vector.search";
|
|
113
|
+
resolver.version = "1.0.42";
|
|
128
114
|
|
|
129
|
-
module.exports = resolver;
|
|
115
|
+
module.exports = resolver;
|
|
@@ -1,18 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Extracts the query from an O-Lang action.
|
|
3
|
-
*
|
|
4
|
-
*
|
|
3
|
+
*
|
|
4
|
+
* Supports formats:
|
|
5
|
+
* - Legacy: "Ask doc-search: vacation policy"
|
|
6
|
+
* - New: Action doc-search "vector.search" "query" "./docs"
|
|
5
7
|
*/
|
|
6
8
|
function extractQuery(action) {
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
if (!match || !match[1]) {
|
|
10
|
-
throw new Error("Invalid doc-search action format");
|
|
9
|
+
if (!action || typeof action !== "string") {
|
|
10
|
+
throw new Error("Invalid doc-search action format: action is not a string");
|
|
11
11
|
}
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
// ✅ NEW FORMAT: Extract all quoted strings
|
|
14
|
+
const quotedMatches = [...action.matchAll(/"([^"]*)"/g)].map(m => m[1]);
|
|
15
|
+
|
|
16
|
+
if (quotedMatches.length >= 2) {
|
|
17
|
+
// Format: "actionType" "query" "doc_root"
|
|
18
|
+
// Return the query (second quoted string)
|
|
19
|
+
return quotedMatches[1].trim();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// ✅ LEGACY FORMAT: Ask doc-search: vacation policy
|
|
23
|
+
const legacyMatch = action.match(/ask doc-search\s*:?\s*(.+)$/i);
|
|
24
|
+
if (legacyMatch && legacyMatch[1]) {
|
|
25
|
+
return legacyMatch[1].trim();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// ❌ Fallback error
|
|
29
|
+
throw new Error(`Invalid doc-search action format: "${action}"`);
|
|
14
30
|
}
|
|
15
31
|
|
|
16
32
|
module.exports = {
|
|
17
33
|
extractQuery
|
|
18
|
-
};
|
|
34
|
+
};
|