@o-lang/semantic-doc-search 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +4 -44
- package/package.json +1 -1
- package/src/embeddings/anthropic.js +4 -3
- package/src/embeddings/groq.js +4 -3
- package/src/embeddings/local.js +6 -3
- package/src/embeddings/openai.js +4 -3
- package/src/llm/anthropic.js +4 -2
- package/src/llm/groq.js +8 -5
- package/src/llm/openai.js +4 -2
- package/src/llm/router.js +7 -4
- package/src/rerank/cohere.js +4 -3
- package/src/rerank/groqRerank.js +4 -3
- package/src/rerank/local.js +4 -3
- package/src/server/streamingHandler.js +15 -8
- package/src/utils/chunker.js +8 -19
- package/src/utils/extractText.js +10 -8
- package/src/utils/fileLoader.js +6 -5
- package/src/utils/highlight.js +13 -22
- package/src/utils/similarity.js +4 -2
- package/src/utils/sseStream.js +4 -2
package/bin/cli.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
const yargs = require("yargs");
|
|
3
|
+
const { hideBin } = require("yargs/helpers");
|
|
4
|
+
const resolver = require("../src/index.js");
|
|
5
5
|
|
|
6
6
|
const argv = yargs(hideBin(process.argv))
|
|
7
7
|
.usage("Usage: $0 <query> [options]")
|
|
@@ -48,44 +48,4 @@ const context = {
|
|
|
48
48
|
} catch (err) {
|
|
49
49
|
console.error("\n❌ Error running search:", err);
|
|
50
50
|
}
|
|
51
|
-
})();
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
// console.error("❌ Please provide a query string.");
|
|
55
|
-
// process.exit(1);
|
|
56
|
-
// }
|
|
57
|
-
|
|
58
|
-
// const stream = argv.stream || false;
|
|
59
|
-
// const provider = argv.provider || "openai";
|
|
60
|
-
|
|
61
|
-
// // Resolve doc_root if given
|
|
62
|
-
// const doc_root = argv.doc_root
|
|
63
|
-
// ? path.resolve(__dirname, "..", argv.doc_root)
|
|
64
|
-
// : undefined;
|
|
65
|
-
|
|
66
|
-
// // Optional runtime API keys (users pass env variables)
|
|
67
|
-
// const openaiApiKey = process.env.OPENAI_API_KEY;
|
|
68
|
-
// const groqApiKey = process.env.GROQ_API_KEY;
|
|
69
|
-
// const anthropicApiKey = process.env.ANTHROPIC_API_KEY;
|
|
70
|
-
|
|
71
|
-
// (async () => {
|
|
72
|
-
// try {
|
|
73
|
-
// const result = await resolver("search", {
|
|
74
|
-
// query,
|
|
75
|
-
// stream,
|
|
76
|
-
// doc_root,
|
|
77
|
-
// options: { provider, openaiApiKey, groqApiKey, anthropicApiKey },
|
|
78
|
-
// onToken: token => {
|
|
79
|
-
// if (stream) process.stdout.write(token);
|
|
80
|
-
// },
|
|
81
|
-
// });
|
|
82
|
-
|
|
83
|
-
// if (!stream) {
|
|
84
|
-
// console.log("\n\n✅ Result:\n");
|
|
85
|
-
// console.log(result.text);
|
|
86
|
-
// console.log("\nMeta:", result.meta);
|
|
87
|
-
// }
|
|
88
|
-
// } catch (err) {
|
|
89
|
-
// console.error("❌ Error running search:", err);
|
|
90
|
-
// }
|
|
91
|
-
// })();
|
|
51
|
+
})();
|
package/package.json
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/embeddings/anthropic.js
|
|
2
|
-
|
|
3
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
const Anthropic = require("@anthropic-ai/sdk");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* AnthropicEmbedding
|
|
7
6
|
* Generates vector embeddings using Claude embeddings API.
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class AnthropicEmbedding {
|
|
10
9
|
constructor(apiKey = process.env.ANTHROPIC_API_KEY) {
|
|
11
10
|
if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY");
|
|
12
11
|
this.client = new Anthropic({ apiKey });
|
|
@@ -43,3 +42,5 @@ export class AnthropicEmbedding {
|
|
|
43
42
|
return res.data.map(item => item.embedding);
|
|
44
43
|
}
|
|
45
44
|
}
|
|
45
|
+
|
|
46
|
+
module.exports = { AnthropicEmbedding };
|
package/src/embeddings/groq.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// src/embeddings/groq.js
|
|
2
|
-
|
|
3
|
-
import Groq from "groq-sdk";
|
|
2
|
+
const Groq = require("groq-sdk");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* GroqEmbedding
|
|
@@ -8,7 +7,7 @@ import Groq from "groq-sdk";
|
|
|
8
7
|
*
|
|
9
8
|
* Default model: nomic-embed-text
|
|
10
9
|
*/
|
|
11
|
-
|
|
10
|
+
class GroqEmbedding {
|
|
12
11
|
constructor(apiKey = process.env.GROQ_API_KEY) {
|
|
13
12
|
if (!apiKey) throw new Error("Missing GROQ_API_KEY");
|
|
14
13
|
this.client = new Groq({ apiKey });
|
|
@@ -45,3 +44,5 @@ export class GroqEmbedding {
|
|
|
45
44
|
return res.data.map(item => item.embedding);
|
|
46
45
|
}
|
|
47
46
|
}
|
|
47
|
+
|
|
48
|
+
module.exports = { GroqEmbedding };
|
package/src/embeddings/local.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
// src/embeddings/local.js
|
|
2
|
+
const crypto = require("crypto");
|
|
2
3
|
|
|
3
4
|
/**
|
|
4
5
|
* LocalEmbedding
|
|
@@ -6,7 +7,7 @@ import crypto from "crypto";
|
|
|
6
7
|
* Each string will produce a consistent vector based on a hash.
|
|
7
8
|
* Note: Not semantic, just a placeholder for testing.
|
|
8
9
|
*/
|
|
9
|
-
|
|
10
|
+
class LocalEmbedding {
|
|
10
11
|
constructor(dim = 512) {
|
|
11
12
|
this.dim = dim;
|
|
12
13
|
}
|
|
@@ -40,7 +41,7 @@ export class LocalEmbedding {
|
|
|
40
41
|
* Convenience function for index.js
|
|
41
42
|
* Retries local embedding generation (mostly placeholder, but keeps API compatible)
|
|
42
43
|
*/
|
|
43
|
-
|
|
44
|
+
async function createEmbeddingWithRetry(text, options = {}, retries = 1) {
|
|
44
45
|
const embedder = new LocalEmbedding();
|
|
45
46
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
46
47
|
try {
|
|
@@ -50,3 +51,5 @@ export async function createEmbeddingWithRetry(text, options = {}, retries = 1)
|
|
|
50
51
|
}
|
|
51
52
|
}
|
|
52
53
|
}
|
|
54
|
+
|
|
55
|
+
module.exports = { LocalEmbedding, createEmbeddingWithRetry };
|
package/src/embeddings/openai.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// src/embeddings/openai.js
|
|
2
|
-
|
|
3
|
-
import OpenAI from "openai";
|
|
2
|
+
const OpenAI = require("openai");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* OpenAIEmbedding
|
|
@@ -8,7 +7,7 @@ import OpenAI from "openai";
|
|
|
8
7
|
*
|
|
9
8
|
* Default model: text-embedding-3-large (best for RAG)
|
|
10
9
|
*/
|
|
11
|
-
|
|
10
|
+
class OpenAIEmbedding {
|
|
12
11
|
constructor(apiKey = process.env.OPENAI_API_KEY) {
|
|
13
12
|
if (!apiKey) throw new Error("Missing OPENAI_API_KEY");
|
|
14
13
|
this.client = new OpenAI({ apiKey });
|
|
@@ -45,3 +44,5 @@ export class OpenAIEmbedding {
|
|
|
45
44
|
return res.data.map(item => item.embedding);
|
|
46
45
|
}
|
|
47
46
|
}
|
|
47
|
+
|
|
48
|
+
module.exports = { OpenAIEmbedding };
|
package/src/llm/anthropic.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
// src/llm/anthropic.js
|
|
2
|
-
|
|
2
|
+
const Anthropic = require("@anthropic-ai/sdk");
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* Anthropic LLM Provider
|
|
6
6
|
*/
|
|
7
|
-
|
|
7
|
+
class AnthropicProvider {
|
|
8
8
|
constructor({ apiKey }) {
|
|
9
9
|
if (!apiKey) console.warn("⚠️ WARNING: ANTHROPIC_API_KEY missing");
|
|
10
10
|
this.client = new Anthropic({ apiKey });
|
|
@@ -34,3 +34,5 @@ export default class AnthropicProvider {
|
|
|
34
34
|
return { done: true };
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
+
|
|
38
|
+
module.exports = AnthropicProvider;
|
package/src/llm/groq.js
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
// src/llm/groq.js
|
|
2
|
-
|
|
3
|
-
// Multi-tenant safe, streaming-ready
|
|
2
|
+
const Groq = require("groq-sdk");
|
|
4
3
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
/**
|
|
5
|
+
* Groq LLM Provider (groq-sdk@0.5.0)
|
|
6
|
+
* Multi-tenant safe, streaming-ready
|
|
7
|
+
*/
|
|
8
|
+
class GroqProvider {
|
|
8
9
|
/**
|
|
9
10
|
* @param {Object} options
|
|
10
11
|
* @param {string} options.apiKey - User-provided Groq API key
|
|
@@ -50,3 +51,5 @@ export default class GroqProvider {
|
|
|
50
51
|
return { done: true };
|
|
51
52
|
}
|
|
52
53
|
}
|
|
54
|
+
|
|
55
|
+
module.exports = GroqProvider;
|
package/src/llm/openai.js
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
// src/llm/openai.js
|
|
2
|
-
|
|
2
|
+
const OpenAI = require("openai");
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* OpenAI LLM Provider
|
|
6
6
|
* Supports GPT-4, GPT-4o-mini, GPT-4o, and any Chat model
|
|
7
7
|
*/
|
|
8
|
-
|
|
8
|
+
class OpenAIProvider {
|
|
9
9
|
constructor({ apiKey }) {
|
|
10
10
|
if (!apiKey) console.warn("⚠️ WARNING: OPENAI_API_KEY missing");
|
|
11
11
|
this.client = new OpenAI({ apiKey });
|
|
@@ -41,3 +41,5 @@ export default class OpenAIProvider {
|
|
|
41
41
|
return { done: true };
|
|
42
42
|
}
|
|
43
43
|
}
|
|
44
|
+
|
|
45
|
+
module.exports = OpenAIProvider;
|
package/src/llm/router.js
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
// src/llm/router.js
|
|
2
|
+
const OpenAIProvider = require("./openai.js");
|
|
3
|
+
const GroqProvider = require("./groq.js");
|
|
4
|
+
const AnthropicProvider = require("./anthropic.js");
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey } = {}) {
|
|
6
7
|
switch ((provider || "").toLowerCase()) {
|
|
7
8
|
case "openai":
|
|
8
9
|
if (!openaiApiKey) throw new Error("Missing OpenAI API key");
|
|
@@ -20,3 +21,5 @@ export function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey
|
|
|
20
21
|
throw new Error(`Unsupported LLM provider: ${provider}`);
|
|
21
22
|
}
|
|
22
23
|
}
|
|
24
|
+
|
|
25
|
+
module.exports = { createLLM };
|
package/src/rerank/cohere.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/rerank/cohere.js
|
|
2
|
-
|
|
3
|
-
import Cohere from "cohere-ai";
|
|
2
|
+
const Cohere = require("cohere-ai");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* CohereReranker
|
|
7
6
|
* Uses Cohere Rerank API to reorder candidate documents/snippets
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class CohereReranker {
|
|
10
9
|
constructor(apiKey = process.env.COHERE_API_KEY) {
|
|
11
10
|
if (!apiKey) throw new Error("Missing COHERE_API_KEY");
|
|
12
11
|
Cohere.init(apiKey);
|
|
@@ -37,3 +36,5 @@ export class CohereReranker {
|
|
|
37
36
|
return ranked;
|
|
38
37
|
}
|
|
39
38
|
}
|
|
39
|
+
|
|
40
|
+
module.exports = { CohereReranker };
|
package/src/rerank/groqRerank.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/rerank/groqRerank.js
|
|
2
|
-
|
|
3
|
-
import Groq from "groq-sdk";
|
|
2
|
+
const Groq = require("groq-sdk");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* GroqReranker
|
|
7
6
|
* Uses Groq LLMs to rerank candidate documents/snippets given a query
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class GroqReranker {
|
|
10
9
|
constructor(apiKey = process.env.GROQ_API_KEY) {
|
|
11
10
|
if (!apiKey) throw new Error("Missing GROQ_API_KEY");
|
|
12
11
|
this.client = new Groq({ apiKey });
|
|
@@ -48,3 +47,5 @@ Return JSON array: [{"text": "...", "score": 0.95}, ...]
|
|
|
48
47
|
}
|
|
49
48
|
}
|
|
50
49
|
}
|
|
50
|
+
|
|
51
|
+
module.exports = { GroqReranker };
|
package/src/rerank/local.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/rerank/local.js
|
|
2
|
-
|
|
3
|
-
import { cosine } from "../utils/similarity.js";
|
|
2
|
+
const { cosine } = require("../utils/similarity.js");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* LocalReranker
|
|
7
6
|
* Simple fallback reranker using keyword overlap + cosine similarity
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class LocalReranker {
|
|
10
9
|
constructor() {}
|
|
11
10
|
|
|
12
11
|
/**
|
|
@@ -41,3 +40,5 @@ export class LocalReranker {
|
|
|
41
40
|
return results;
|
|
42
41
|
}
|
|
43
42
|
}
|
|
43
|
+
|
|
44
|
+
module.exports = { LocalReranker };
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
// src/server/streamingHandler.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import { llmRouter } from "../llm/router.js";
|
|
2
|
+
const express = require("express");
|
|
3
|
+
const { initSSE, sendSSE } = require("../utils/sseStream.js");
|
|
4
|
+
const { createLLM } = require("../llm/router.js");
|
|
6
5
|
|
|
7
6
|
const router = express.Router();
|
|
8
7
|
|
|
@@ -18,11 +17,19 @@ router.post("/stream", async (req, res) => {
|
|
|
18
17
|
// Initialize SSE
|
|
19
18
|
initSSE(res);
|
|
20
19
|
|
|
21
|
-
// LLM
|
|
22
|
-
|
|
23
|
-
|
|
20
|
+
// Create LLM instance
|
|
21
|
+
const llm = createLLM({
|
|
22
|
+
provider,
|
|
23
|
+
openaiApiKey: options?.openaiApiKey,
|
|
24
|
+
groqApiKey: options?.groqApiKey,
|
|
25
|
+
anthropicApiKey: options?.anthropicApiKey
|
|
24
26
|
});
|
|
25
27
|
|
|
28
|
+
// Stream the query response
|
|
29
|
+
await llm.stream({ prompt: query, onToken: (token) => {
|
|
30
|
+
sendSSE(res, { token });
|
|
31
|
+
}});
|
|
32
|
+
|
|
26
33
|
// End stream
|
|
27
34
|
sendSSE(res, { done: true }, true);
|
|
28
35
|
|
|
@@ -32,4 +39,4 @@ router.post("/stream", async (req, res) => {
|
|
|
32
39
|
}
|
|
33
40
|
});
|
|
34
41
|
|
|
35
|
-
|
|
42
|
+
module.exports = { router: router };
|
package/src/utils/chunker.js
CHANGED
|
@@ -1,27 +1,16 @@
|
|
|
1
1
|
// src/utils/chunker.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
* chunkText
|
|
5
|
-
* Split text into overlapping chunks
|
|
6
|
-
*
|
|
7
|
-
* @param {string} text - the full text
|
|
8
|
-
* @param {number} chunkSize - number of characters per chunk
|
|
9
|
-
* @param {number} overlap - number of characters overlap between chunks
|
|
10
|
-
* @returns string[] - array of text chunks
|
|
11
|
-
*/
|
|
12
|
-
export function chunkText(text, chunkSize = 1000, overlap = 200) {
|
|
13
|
-
if (!text || !text.trim()) return [];
|
|
14
|
-
|
|
2
|
+
function chunkText(text, chunkSize, overlap) {
|
|
3
|
+
if (!text) return [];
|
|
15
4
|
const chunks = [];
|
|
16
5
|
let start = 0;
|
|
17
|
-
|
|
6
|
+
|
|
18
7
|
while (start < text.length) {
|
|
19
8
|
const end = Math.min(start + chunkSize, text.length);
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
start += chunkSize - overlap; // move forward with overlap
|
|
9
|
+
chunks.push(text.slice(start, end));
|
|
10
|
+
start += chunkSize - overlap;
|
|
24
11
|
}
|
|
25
|
-
|
|
12
|
+
|
|
26
13
|
return chunks;
|
|
27
14
|
}
|
|
15
|
+
|
|
16
|
+
module.exports = { chunkText };
|
package/src/utils/extractText.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
// src/utils/extractText.js
|
|
2
|
+
const fs = require("fs");
|
|
3
|
+
const path = require("path");
|
|
4
|
+
const { JSDOM } = require("jsdom");
|
|
5
|
+
const pdfParse = require("pdf-parse");
|
|
6
6
|
|
|
7
7
|
/**
|
|
8
8
|
* extractTextFromFile
|
|
@@ -13,7 +13,7 @@ import { readFile } from "fs/promises";
|
|
|
13
13
|
* - .pdf
|
|
14
14
|
* - .docx (minimal)
|
|
15
15
|
*/
|
|
16
|
-
|
|
16
|
+
async function extractTextFromFile(filePath) {
|
|
17
17
|
const ext = path.extname(filePath).toLowerCase();
|
|
18
18
|
|
|
19
19
|
if (ext === ".txt" || ext === ".md") {
|
|
@@ -34,7 +34,7 @@ export async function extractTextFromFile(filePath) {
|
|
|
34
34
|
|
|
35
35
|
if (ext === ".docx") {
|
|
36
36
|
// Minimal extraction using ZIP (can be improved)
|
|
37
|
-
const
|
|
37
|
+
const StreamZip = require("node-stream-zip").default;
|
|
38
38
|
const zip = new StreamZip.async({ file: filePath });
|
|
39
39
|
const content = await zip.entryData("word/document.xml");
|
|
40
40
|
await zip.close();
|
|
@@ -49,7 +49,7 @@ export async function extractTextFromFile(filePath) {
|
|
|
49
49
|
* Basic keyword extraction for lexical matching
|
|
50
50
|
* Returns array of lowercase words, stripped of punctuation
|
|
51
51
|
*/
|
|
52
|
-
|
|
52
|
+
function extractKeywords(text = "") {
|
|
53
53
|
if (!text) return [];
|
|
54
54
|
return text
|
|
55
55
|
.toLowerCase()
|
|
@@ -57,3 +57,5 @@ export function extractKeywords(text = "") {
|
|
|
57
57
|
.split(/\s+/)
|
|
58
58
|
.filter(Boolean);
|
|
59
59
|
}
|
|
60
|
+
|
|
61
|
+
module.exports = { extractTextFromFile, extractKeywords };
|
package/src/utils/fileLoader.js
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
// src/utils/fileLoader.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import { extractTextFromFile } from "./extractText.js";
|
|
2
|
+
const fs = require("fs");
|
|
3
|
+
const path = require("path");
|
|
4
|
+
const { extractTextFromFile } = require("./extractText.js");
|
|
6
5
|
|
|
7
6
|
/**
|
|
8
7
|
* loadDocuments
|
|
@@ -13,7 +12,7 @@ import { extractTextFromFile } from "./extractText.js";
|
|
|
13
12
|
* @param {string[]} exts - array of supported file extensions
|
|
14
13
|
* @returns {Promise<Array<{ filePath: string, text: string }>>}
|
|
15
14
|
*/
|
|
16
|
-
|
|
15
|
+
async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".html", ".docx"]) {
|
|
17
16
|
if (!fs.existsSync(dirPath)) return [];
|
|
18
17
|
|
|
19
18
|
const files = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
@@ -37,3 +36,5 @@ export async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".ht
|
|
|
37
36
|
|
|
38
37
|
return docs;
|
|
39
38
|
}
|
|
39
|
+
|
|
40
|
+
module.exports = { loadDocuments };
|
package/src/utils/highlight.js
CHANGED
|
@@ -1,24 +1,15 @@
|
|
|
1
1
|
// src/utils/highlight.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
let highlighted = text;
|
|
15
|
-
|
|
16
|
-
// escape special regex chars in keywords
|
|
17
|
-
const escapedKeywords = keywords.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
|
|
18
|
-
|
|
19
|
-
const pattern = new RegExp(`\\b(${escapedKeywords.join("|")})\\b`, "gi");
|
|
20
|
-
|
|
21
|
-
highlighted = highlighted.replace(pattern, "<mark>$1</mark>");
|
|
22
|
-
|
|
23
|
-
return highlighted;
|
|
2
|
+
function highlightMatches(text, keywords) {
|
|
3
|
+
if (!text || !keywords || !keywords.length) return text;
|
|
4
|
+
|
|
5
|
+
let result = text;
|
|
6
|
+
keywords.forEach(keyword => {
|
|
7
|
+
if (!keyword) return;
|
|
8
|
+
const regex = new RegExp(`(${keyword})`, 'gi');
|
|
9
|
+
result = result.replace(regex, '**$1**');
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
return result;
|
|
24
13
|
}
|
|
14
|
+
|
|
15
|
+
module.exports = { highlightMatches };
|
package/src/utils/similarity.js
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* @param {number[]} b
|
|
8
8
|
* @returns {number} similarity score between -1 and 1
|
|
9
9
|
*/
|
|
10
|
-
|
|
10
|
+
function cosine(a, b) {
|
|
11
11
|
if (!a || !b || a.length !== b.length) return 0;
|
|
12
12
|
|
|
13
13
|
let dot = 0, magA = 0, magB = 0;
|
|
@@ -30,7 +30,7 @@ export function cosine(a, b) {
|
|
|
30
30
|
* @param {number[]} b
|
|
31
31
|
* @returns {number} Euclidean distance
|
|
32
32
|
*/
|
|
33
|
-
|
|
33
|
+
function euclidean(a, b) {
|
|
34
34
|
if (!a || !b || a.length !== b.length) return Infinity;
|
|
35
35
|
|
|
36
36
|
let sum = 0;
|
|
@@ -40,3 +40,5 @@ export function euclidean(a, b) {
|
|
|
40
40
|
|
|
41
41
|
return Math.sqrt(sum);
|
|
42
42
|
}
|
|
43
|
+
|
|
44
|
+
module.exports = { cosine, euclidean };
|
package/src/utils/sseStream.js
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
* @param {string} data
|
|
9
9
|
* @param {boolean} [end=false] - whether this is the final message
|
|
10
10
|
*/
|
|
11
|
-
|
|
11
|
+
function sendSSE(res, data, end = false) {
|
|
12
12
|
res.write(`data: ${JSON.stringify(data)}\n\n`);
|
|
13
13
|
if (end) res.write("event: end\ndata: [DONE]\n\n");
|
|
14
14
|
}
|
|
@@ -19,7 +19,7 @@ export function sendSSE(res, data, end = false) {
|
|
|
19
19
|
*
|
|
20
20
|
* @param {import('http').ServerResponse} res
|
|
21
21
|
*/
|
|
22
|
-
|
|
22
|
+
function initSSE(res) {
|
|
23
23
|
res.writeHead(200, {
|
|
24
24
|
"Content-Type": "text/event-stream",
|
|
25
25
|
"Cache-Control": "no-cache",
|
|
@@ -27,3 +27,5 @@ export function initSSE(res) {
|
|
|
27
27
|
});
|
|
28
28
|
res.write("\n");
|
|
29
29
|
}
|
|
30
|
+
|
|
31
|
+
module.exports = { sendSSE, initSSE };
|