@o-lang/semantic-doc-search 1.0.35 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.35",
3
+ "version": "1.0.37",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
@@ -4,8 +4,18 @@ class VectorAdapter {
4
4
  this.dimension = config.dimension || null;
5
5
  }
6
6
 
7
+ /**
8
+ * Validates vector input.
9
+ * Accepts:
10
+ * - Plain arrays (e.g., [0.1, -0.2, ...])
11
+ * - TypedArrays (e.g., Float32Array)
12
+ */
7
13
  validateVector(vector) {
8
- if (!Array.isArray(vector)) throw new Error("Vector must be an array");
14
+ // Allow both Array and TypedArray (Float32Array, etc.)
15
+ if (!Array.isArray(vector) && !ArrayBuffer.isView(vector)) {
16
+ throw new Error("Vector must be an array or TypedArray");
17
+ }
18
+
9
19
  if (this.dimension && vector.length !== this.dimension) {
10
20
  throw new Error(
11
21
  `Vector dimension mismatch: expected ${this.dimension}, got ${vector.length}`
@@ -25,4 +35,4 @@ class VectorAdapter {
25
35
  }
26
36
  }
27
37
 
28
- module.exports = VectorAdapter;
38
+ module.exports = VectorAdapter;
package/src/index.js CHANGED
@@ -1,256 +1,9 @@
1
- const fs = require("fs");
2
- const path = require("path");
3
- const embedder = require("./embeddings/local.js"); // ✅ singleton embedder
4
- const { chunkText } = require("./utils/chunker.js");
5
- const { extractKeywords } = require("./utils/extractText.js");
6
- const { highlightMatches } = require("./utils/highlight.js");
7
- const VectorRouter = require("./adapters/vectorRouter");
8
- const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
9
-
10
- const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
11
-
12
- /* ---------------- UTIL ---------------- */
13
-
14
- function safeResolve(base, userPath) {
15
- const resolved = path.resolve(base, userPath);
16
- if (!resolved.startsWith(path.resolve(base))) {
17
- throw new Error("Path traversal detected");
18
- }
19
- return resolved;
20
- }
21
-
22
- function loadCache() {
23
- try {
24
- if (fs.existsSync(CACHE_PATH)) {
25
- return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
26
- }
27
- } catch {}
28
- return {};
29
- }
30
-
31
- function saveCache(cache) {
32
- try {
33
- fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2));
34
- } catch {}
35
- }
36
-
37
- /* ---------------- DATABASE ADAPTER ---------------- */
38
-
39
- class DatabaseAdapter {
40
- constructor() {
41
- this.initialized = false;
42
- }
43
-
44
- async initialize(context) {
45
- if (this.initialized) return;
46
-
47
- if (context.MONGO_URI) await this.initMongo(context);
48
- else if (context.db_path) await this.initSQLite(context);
49
- else if (context.POSTGRES_URL) await this.initPostgres(context);
50
-
51
- this.initialized = true;
52
- }
53
-
54
- async initSQLite(context) {
55
- const Database = require("better-sqlite3");
56
- const dbPath = context.db_path || "./database.db";
57
- this.sqlite = new Database(dbPath, { readonly: true });
58
- }
59
-
60
- async initMongo(context) {
61
- const { MongoClient } = require("mongodb");
62
- this.mongo = new MongoClient(context.MONGO_URI);
63
- await this.mongo.connect();
64
- }
65
-
66
- async initPostgres(context) {
67
- const { Pool } = require("pg");
68
- this.pg = new Pool({ connectionString: context.POSTGRES_URL });
69
- }
70
-
71
- async queryDocuments(context) {
72
- const table = context.db_table || "documents";
73
- const idCol = context.db_id_column || "id";
74
- const contentCol = context.db_content_column || "content";
75
-
76
- if (this.mongo) {
77
- const rows = await this.mongo.db().collection(table).find({}).toArray();
78
- return rows.map(r => ({
79
- id: r._id.toString(),
80
- content: r[contentCol] || "",
81
- source: `mongodb:${table}`,
82
- }));
83
- }
84
-
85
- if (this.sqlite) {
86
- const rows = this.sqlite
87
- .prepare(`SELECT ${idCol}, ${contentCol} FROM ${table}`)
88
- .all();
89
- return rows.map(r => ({
90
- id: r[idCol],
91
- content: r[contentCol],
92
- source: `sqlite:${table}`,
93
- }));
94
- }
95
-
96
- if (this.pg) {
97
- const res = await this.pg.query(
98
- `SELECT ${idCol}, ${contentCol} FROM ${table}`
99
- );
100
- return res.rows.map(r => ({
101
- id: r[idCol],
102
- content: r[contentCol],
103
- source: `postgres:${table}`,
104
- }));
105
- }
106
-
107
- return [];
108
- }
109
- }
110
-
111
- /* ---------------- DOCUMENT LOADING ---------------- */
112
-
113
- async function loadAllDocuments(context) {
114
- const docs = [];
115
- const db = new DatabaseAdapter();
116
-
117
- try {
118
- await db.initialize(context);
119
- docs.push(...(await db.queryDocuments(context)));
120
- } catch {}
121
-
122
- const baseDir = context.doc_root
123
- ? safeResolve(process.cwd(), context.doc_root)
124
- : path.join(process.cwd(), "docs");
125
-
126
- if (fs.existsSync(baseDir)) {
127
- const files = fs
128
- .readdirSync(baseDir)
129
- .filter(f => f.endsWith(".txt") || f.endsWith(".md"));
130
-
131
- for (const file of files) {
132
- docs.push({
133
- id: file,
134
- content: fs.readFileSync(path.join(baseDir, file), "utf8"),
135
- source: `file:${file}`,
136
- });
137
- }
138
- }
139
-
140
- return docs;
141
- }
142
-
143
- /* ---------------- HYBRID VECTOR SEARCH ---------------- */
144
-
145
- async function performHybridDocQA(query, context) {
146
- const cache = loadCache();
147
-
148
- const MIN_SCORE = context.minScore ?? 0.75;
149
- const topK = context.topK ?? 5;
150
-
151
- const vectorStore = VectorRouter.create({
152
- backend: context.vectorBackend || "memory",
153
- dimension: embedder.getDimension(),
154
- ...context,
155
- });
156
-
157
- console.log(
158
- "🧠 Vector store methods:",
159
- Object.getOwnPropertyNames(Object.getPrototypeOf(vectorStore))
160
- );
161
-
162
- const documents = await loadAllDocuments(context);
163
- console.log("📄 Documents loaded:", documents.length);
164
-
165
- if (!documents.length) return { text: "(No documents found)", meta: { matches: 0 } };
166
-
167
- // Multi-document ingestion
168
- for (const doc of documents) {
169
- const chunks = chunkText(doc.content, 500);
170
- console.log(`📦 ${doc.id} split into ${chunks.length} chunks`);
171
-
172
- for (let i = 0; i < chunks.length; i++) {
173
- console.log("🧩 Chunk to embed:", chunks[i]?.substring(0, 100));
174
-
175
- const vector = await embedder.embed(chunks[i]);
176
- if (!vector || vector.every(v => v === 0)) {
177
- console.warn("⚠️ Zero or invalid embedding, skipping chunk");
178
- continue;
179
- }
180
-
181
- await vectorStore.upsert({
182
- id: `${doc.id}:${i}`,
183
- vector,
184
- content: chunks[i],
185
- source: doc.source,
186
- });
187
-
188
- console.log(`✅ Upserted ${doc.id}:${i}`);
189
- }
190
- }
191
-
192
- saveCache(cache);
193
-
194
- // Embed the query
195
- const queryVector = await embedder.embed(query);
196
- if (!queryVector || queryVector.every(v => v === 0)) {
197
- console.warn("⚠️ Query embedding invalid");
198
- return { text: "(Query could not be embedded)", meta: { matches: 0 } };
199
- }
200
-
201
- // Top-K + similarity threshold
202
- const results = await vectorStore.query(queryVector, { topK });
203
- const filtered = results.filter(r => r.score >= MIN_SCORE);
204
-
205
- console.log(`🔍 Search results: ${filtered.length} (after applying minScore=${MIN_SCORE})`);
206
-
207
- if (!filtered.length) {
208
- return { text: "(No relevant match found)", meta: { matches: 0 } };
209
- }
210
-
211
- return {
212
- text: highlightMatches(
213
- filtered.map(r => r.content).join("\n\n"),
214
- extractKeywords(query)
215
- ),
216
- meta: { matches: filtered.length },
217
- };
218
- }
219
-
220
-
221
- /* ---------------- PGVECTOR SEARCH ---------------- */
222
-
223
- async function performPgVectorSearch(query, context) {
224
- const adapter = new PgVectorAdapter({
225
- POSTGRES_URL: context.POSTGRES_URL,
226
- });
227
-
228
- const vector = await embedder.embed(query);
229
- const results = await adapter.search(vector, 5);
230
- await adapter.close();
231
-
232
- return {
233
- text: results.map(r => r.content).join("\n\n"),
234
- meta: { matches: results.length },
235
- };
236
- }
237
-
238
- /* ---------------- ROUTER ---------------- */
239
-
240
- async function performDocQA(query, context) {
241
- if (context.POSTGRES_URL) {
242
- return performPgVectorSearch(query, context);
243
- }
244
- return performHybridDocQA(query, context);
245
- }
246
-
247
- /* ---------------- O-LANG RESOLVER ---------------- */
248
-
249
- const semanticResolver = require("./resolver"); // ✅ IMPORT NEW FILE
1
+ // index.js (6 lines)
2
+ const semanticResolver = require("./resolver");
250
3
 
251
4
  async function docSearchResolver(action, context) {
252
- return semanticResolver(action, context);
5
+ return await semanticResolver(action, context);
253
6
  }
254
7
 
255
8
  docSearchResolver.resolverName = "doc-search";
256
- module.exports = docSearchResolver;
9
+ module.exports = docSearchResolver;