@o-lang/semantic-doc-search 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/embeddings/local.js +6 -3
- package/src/llm/anthropic.js +4 -2
- package/src/llm/groq.js +8 -5
- package/src/llm/openai.js +4 -2
- package/src/llm/router.js +7 -4
- package/src/server/streamingHandler.js +15 -8
- package/src/utils/chunker.js +8 -19
- package/src/utils/extractText.js +10 -8
- package/src/utils/highlight.js +13 -22
- package/src/utils/similarity.js +4 -2
- package/src/utils/sseStream.js +4 -2
package/package.json
CHANGED
package/src/embeddings/local.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
// src/embeddings/local.js
|
|
2
|
+
const crypto = require("crypto");
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* LocalEmbedding
|
|
@@ -6,7 +7,7 @@ import crypto from "crypto";
|
|
|
6
7
|
* Each string will produce a consistent vector based on a hash.
|
|
7
8
|
* Note: Not semantic, just a placeholder for testing.
|
|
8
9
|
*/
|
|
9
|
-
|
|
10
|
+
class LocalEmbedding {
|
|
10
11
|
constructor(dim = 512) {
|
|
11
12
|
this.dim = dim;
|
|
12
13
|
}
|
|
@@ -40,7 +41,7 @@ export class LocalEmbedding {
|
|
|
40
41
|
* Convenience function for index.js
|
|
41
42
|
* Retries local embedding generation (mostly placeholder, but keeps API compatible)
|
|
42
43
|
*/
|
|
43
|
-
|
|
44
|
+
async function createEmbeddingWithRetry(text, options = {}, retries = 1) {
|
|
44
45
|
const embedder = new LocalEmbedding();
|
|
45
46
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
46
47
|
try {
|
|
@@ -50,3 +51,5 @@ export async function createEmbeddingWithRetry(text, options = {}, retries = 1)
|
|
|
50
51
|
}
|
|
51
52
|
}
|
|
52
53
|
}
|
|
54
|
+
|
|
55
|
+
module.exports = { LocalEmbedding, createEmbeddingWithRetry };
|
package/src/llm/anthropic.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
// src/llm/anthropic.js
|
|
2
|
-
|
|
2
|
+
const Anthropic = require("@anthropic-ai/sdk");
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* Anthropic LLM Provider
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
class AnthropicProvider {
|
|
8
8
|
constructor({ apiKey }) {
|
|
9
9
|
if (!apiKey) console.warn("⚠️ WARNING: ANTHROPIC_API_KEY missing");
|
|
10
10
|
this.client = new Anthropic({ apiKey });
|
|
@@ -34,3 +34,5 @@ export default class AnthropicProvider {
|
|
|
34
34
|
return { done: true };
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
+
|
|
38
|
+
module.exports = AnthropicProvider;
|
package/src/llm/groq.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
// src/llm/groq.js
|
|
2
|
-
|
|
3
|
-
// Multi-tenant safe, streaming-ready
|
|
2
|
+
const Groq = require("groq-sdk");
|
|
4
3
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Groq LLM Provider (groq-sdk@0.5.0)
|
|
6
|
+
* Multi-tenant safe, streaming-ready
|
|
7
|
+
*/
|
|
8
|
+
class GroqProvider {
|
|
8
9
|
/**
|
|
9
10
|
* @param {Object} options
|
|
10
11
|
* @param {string} options.apiKey - User-provided Groq API key
|
|
@@ -50,3 +51,5 @@ export default class GroqProvider {
|
|
|
50
51
|
return { done: true };
|
|
51
52
|
}
|
|
52
53
|
}
|
|
54
|
+
|
|
55
|
+
module.exports = GroqProvider;
|
package/src/llm/openai.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
// src/llm/openai.js
|
|
2
|
-
|
|
2
|
+
const OpenAI = require("openai");
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* OpenAI LLM Provider
|
|
6
6
|
* Supports GPT-4, GPT-4o-mini, GPT-4o, and any Chat model
|
|
7
7
|
*/
|
|
8
|
-
|
|
8
|
+
class OpenAIProvider {
|
|
9
9
|
constructor({ apiKey }) {
|
|
10
10
|
if (!apiKey) console.warn("⚠️ WARNING: OPENAI_API_KEY missing");
|
|
11
11
|
this.client = new OpenAI({ apiKey });
|
|
@@ -41,3 +41,5 @@ export default class OpenAIProvider {
|
|
|
41
41
|
return { done: true };
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
+
|
|
45
|
+
module.exports = OpenAIProvider;
|
package/src/llm/router.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
// src/llm/router.js
|
|
2
|
+
const OpenAIProvider = require("./openai.js");
|
|
3
|
+
const GroqProvider = require("./groq.js");
|
|
4
|
+
const AnthropicProvider = require("./anthropic.js");
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey } = {}) {
|
|
6
7
|
switch ((provider || "").toLowerCase()) {
|
|
7
8
|
case "openai":
|
|
8
9
|
if (!openaiApiKey) throw new Error("Missing OpenAI API key");
|
|
@@ -20,3 +21,5 @@ export function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey
|
|
|
20
21
|
throw new Error(`Unsupported LLM provider: ${provider}`);
|
|
21
22
|
}
|
|
22
23
|
}
|
|
24
|
+
|
|
25
|
+
module.exports = { createLLM };
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
// src/server/streamingHandler.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import { llmRouter } from "../llm/router.js";
|
|
2
|
+
const express = require("express");
|
|
3
|
+
const { initSSE, sendSSE } = require("../utils/sseStream.js");
|
|
4
|
+
const { createLLM } = require("../llm/router.js");
|
|
6
5
|
|
|
7
6
|
const router = express.Router();
|
|
8
7
|
|
|
@@ -18,11 +17,19 @@ router.post("/stream", async (req, res) => {
|
|
|
18
17
|
// Initialize SSE
|
|
19
18
|
initSSE(res);
|
|
20
19
|
|
|
21
|
-
// LLM
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
// Create LLM instance
|
|
21
|
+
const llm = createLLM({
|
|
22
|
+
provider,
|
|
23
|
+
openaiApiKey: options?.openaiApiKey,
|
|
24
|
+
groqApiKey: options?.groqApiKey,
|
|
25
|
+
anthropicApiKey: options?.anthropicApiKey
|
|
24
26
|
});
|
|
25
27
|
|
|
28
|
+
// Stream the query response
|
|
29
|
+
await llm.stream({ prompt: query, onToken: (token) => {
|
|
30
|
+
sendSSE(res, { token });
|
|
31
|
+
}});
|
|
32
|
+
|
|
26
33
|
// End stream
|
|
27
34
|
sendSSE(res, { done: true }, true);
|
|
28
35
|
|
|
@@ -32,4 +39,4 @@ router.post("/stream", async (req, res) => {
|
|
|
32
39
|
}
|
|
33
40
|
});
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
module.exports = { router: router };
|
package/src/utils/chunker.js
CHANGED
|
@@ -1,27 +1,16 @@
|
|
|
1
1
|
// src/utils/chunker.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
* chunkText
|
|
5
|
-
* Split text into overlapping chunks
|
|
6
|
-
*
|
|
7
|
-
* @param {string} text - the full text
|
|
8
|
-
* @param {number} chunkSize - number of characters per chunk
|
|
9
|
-
* @param {number} overlap - number of characters overlap between chunks
|
|
10
|
-
* @returns string[] - array of text chunks
|
|
11
|
-
*/
|
|
12
|
-
export function chunkText(text, chunkSize = 1000, overlap = 200) {
|
|
13
|
-
if (!text || !text.trim()) return [];
|
|
14
|
-
|
|
2
|
+
function chunkText(text, chunkSize, overlap) {
|
|
3
|
+
if (!text) return [];
|
|
15
4
|
const chunks = [];
|
|
16
5
|
let start = 0;
|
|
17
|
-
|
|
6
|
+
|
|
18
7
|
while (start < text.length) {
|
|
19
8
|
const end = Math.min(start + chunkSize, text.length);
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
start += chunkSize - overlap; // move forward with overlap
|
|
9
|
+
chunks.push(text.slice(start, end));
|
|
10
|
+
start += chunkSize - overlap;
|
|
24
11
|
}
|
|
25
|
-
|
|
12
|
+
|
|
26
13
|
return chunks;
|
|
27
14
|
}
|
|
15
|
+
|
|
16
|
+
module.exports = { chunkText };
|
package/src/utils/extractText.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
// src/utils/extractText.js
|
|
2
|
+
const fs = require("fs");
|
|
3
|
+
const path = require("path");
|
|
4
|
+
const { JSDOM } = require("jsdom");
|
|
5
|
+
const pdfParse = require("pdf-parse");
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* extractTextFromFile
|
|
@@ -13,7 +13,7 @@ import { readFile } from "fs/promises";
|
|
|
13
13
|
* - .pdf
|
|
14
14
|
* - .docx (minimal)
|
|
15
15
|
*/
|
|
16
|
-
|
|
16
|
+
async function extractTextFromFile(filePath) {
|
|
17
17
|
const ext = path.extname(filePath).toLowerCase();
|
|
18
18
|
|
|
19
19
|
if (ext === ".txt" || ext === ".md") {
|
|
@@ -34,7 +34,7 @@ export async function extractTextFromFile(filePath) {
|
|
|
34
34
|
|
|
35
35
|
if (ext === ".docx") {
|
|
36
36
|
// Minimal extraction using ZIP (can be improved)
|
|
37
|
-
const
|
|
37
|
+
const StreamZip = require("node-stream-zip").default;
|
|
38
38
|
const zip = new StreamZip.async({ file: filePath });
|
|
39
39
|
const content = await zip.entryData("word/document.xml");
|
|
40
40
|
await zip.close();
|
|
@@ -49,7 +49,7 @@ export async function extractTextFromFile(filePath) {
|
|
|
49
49
|
* Basic keyword extraction for lexical matching
|
|
50
50
|
* Returns array of lowercase words, stripped of punctuation
|
|
51
51
|
*/
|
|
52
|
-
|
|
52
|
+
function extractKeywords(text = "") {
|
|
53
53
|
if (!text) return [];
|
|
54
54
|
return text
|
|
55
55
|
.toLowerCase()
|
|
@@ -57,3 +57,5 @@ export function extractKeywords(text = "") {
|
|
|
57
57
|
.split(/\s+/)
|
|
58
58
|
.filter(Boolean);
|
|
59
59
|
}
|
|
60
|
+
|
|
61
|
+
module.exports = { extractTextFromFile, extractKeywords };
|
package/src/utils/highlight.js
CHANGED
|
@@ -1,24 +1,15 @@
|
|
|
1
1
|
// src/utils/highlight.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
let highlighted = text;
|
|
15
|
-
|
|
16
|
-
// escape special regex chars in keywords
|
|
17
|
-
const escapedKeywords = keywords.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
18
|
-
|
|
19
|
-
const pattern = new RegExp(`\\b(${escapedKeywords.join("|")})\\b`, "gi");
|
|
20
|
-
|
|
21
|
-
highlighted = highlighted.replace(pattern, "<mark>$1</mark>");
|
|
22
|
-
|
|
23
|
-
return highlighted;
|
|
2
|
+
function highlightMatches(text, keywords) {
|
|
3
|
+
if (!text || !keywords || !keywords.length) return text;
|
|
4
|
+
|
|
5
|
+
let result = text;
|
|
6
|
+
keywords.forEach(keyword => {
|
|
7
|
+
if (!keyword) return;
|
|
8
|
+
const regex = new RegExp(`(${keyword})`, 'gi');
|
|
9
|
+
result = result.replace(regex, '**$1**');
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
return result;
|
|
24
13
|
}
|
|
14
|
+
|
|
15
|
+
module.exports = { highlightMatches };
|
package/src/utils/similarity.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* @param {number[]} b
|
|
8
8
|
* @returns {number} similarity score between -1 and 1
|
|
9
9
|
*/
|
|
10
|
-
|
|
10
|
+
function cosine(a, b) {
|
|
11
11
|
if (!a || !b || a.length !== b.length) return 0;
|
|
12
12
|
|
|
13
13
|
let dot = 0, magA = 0, magB = 0;
|
|
@@ -30,7 +30,7 @@ export function cosine(a, b) {
|
|
|
30
30
|
* @param {number[]} b
|
|
31
31
|
* @returns {number} Euclidean distance
|
|
32
32
|
*/
|
|
33
|
-
|
|
33
|
+
function euclidean(a, b) {
|
|
34
34
|
if (!a || !b || a.length !== b.length) return Infinity;
|
|
35
35
|
|
|
36
36
|
let sum = 0;
|
|
@@ -40,3 +40,5 @@ export function euclidean(a, b) {
|
|
|
40
40
|
|
|
41
41
|
return Math.sqrt(sum);
|
|
42
42
|
}
|
|
43
|
+
|
|
44
|
+
module.exports = { cosine, euclidean };
|
package/src/utils/sseStream.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* @param {string} data
|
|
9
9
|
* @param {boolean} [end=false] - whether this is the final message
|
|
10
10
|
*/
|
|
11
|
-
|
|
11
|
+
function sendSSE(res, data, end = false) {
|
|
12
12
|
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
13
13
|
if (end) res.write("event: end\ndata: [DONE]\n\n");
|
|
14
14
|
}
|
|
@@ -19,7 +19,7 @@ export function sendSSE(res, data, end = false) {
|
|
|
19
19
|
*
|
|
20
20
|
* @param {import('http').ServerResponse} res
|
|
21
21
|
*/
|
|
22
|
-
|
|
22
|
+
function initSSE(res) {
|
|
23
23
|
res.writeHead(200, {
|
|
24
24
|
"Content-Type": "text/event-stream",
|
|
25
25
|
"Cache-Control": "no-cache",
|
|
@@ -27,3 +27,5 @@ export function initSSE(res) {
|
|
|
27
27
|
});
|
|
28
28
|
res.write("\n");
|
|
29
29
|
}
|
|
30
|
+
|
|
31
|
+
module.exports = { sendSSE, initSSE };
|