@o-lang/semantic-doc-search 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
- import yargs from "yargs";
3
- import { hideBin } from "yargs/helpers";
4
- import resolver from "../src/index.js";
2
+ const yargs = require("yargs");
3
+ const { hideBin } = require("yargs/helpers");
4
+ const resolver = require("../src/index.js");
5
5
 
6
6
  const argv = yargs(hideBin(process.argv))
7
7
  .usage("Usage: $0 <query> [options]")
@@ -48,44 +48,4 @@ const context = {
48
48
  } catch (err) {
49
49
  console.error("\n❌ Error running search:", err);
50
50
  }
51
- })();
52
-
53
-
54
- // console.error("❌ Please provide a query string.");
55
- // process.exit(1);
56
- // }
57
-
58
- // const stream = argv.stream || false;
59
- // const provider = argv.provider || "openai";
60
-
61
- // // Resolve doc_root if given
62
- // const doc_root = argv.doc_root
63
- // ? path.resolve(__dirname, "..", argv.doc_root)
64
- // : undefined;
65
-
66
- // // Optional runtime API keys (users pass env variables)
67
- // const openaiApiKey = process.env.OPENAI_API_KEY;
68
- // const groqApiKey = process.env.GROQ_API_KEY;
69
- // const anthropicApiKey = process.env.ANTHROPIC_API_KEY;
70
-
71
- // (async () => {
72
- // try {
73
- // const result = await resolver("search", {
74
- // query,
75
- // stream,
76
- // doc_root,
77
- // options: { provider, openaiApiKey, groqApiKey, anthropicApiKey },
78
- // onToken: token => {
79
- // if (stream) process.stdout.write(token);
80
- // },
81
- // });
82
-
83
- // if (!stream) {
84
- // console.log("\n\n✅ Result:\n");
85
- // console.log(result.text);
86
- // console.log("\nMeta:", result.meta);
87
- // }
88
- // } catch (err) {
89
- // console.error("❌ Error running search:", err);
90
- // }
91
- // })();
51
+ })();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
@@ -1,12 +1,11 @@
1
1
  // src/embeddings/anthropic.js
2
-
3
- import Anthropic from "@anthropic-ai/sdk";
2
+ const Anthropic = require("@anthropic-ai/sdk");
4
3
 
5
4
  /**
6
5
  * AnthropicEmbedding
7
6
  * Generates vector embeddings using Claude embeddings API.
8
7
  */
9
- export class AnthropicEmbedding {
8
+ class AnthropicEmbedding {
10
9
  constructor(apiKey = process.env.ANTHROPIC_API_KEY) {
11
10
  if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY");
12
11
  this.client = new Anthropic({ apiKey });
@@ -43,3 +42,5 @@ export class AnthropicEmbedding {
43
42
  return res.data.map(item => item.embedding);
44
43
  }
45
44
  }
45
+
46
+ module.exports = { AnthropicEmbedding };
@@ -1,6 +1,5 @@
1
1
  // src/embeddings/groq.js
2
-
3
- import Groq from "groq-sdk";
2
+ const Groq = require("groq-sdk");
4
3
 
5
4
  /**
6
5
  * GroqEmbedding
@@ -8,7 +7,7 @@ import Groq from "groq-sdk";
8
7
  *
9
8
  * Default model: nomic-embed-text
10
9
  */
11
- export class GroqEmbedding {
10
+ class GroqEmbedding {
12
11
  constructor(apiKey = process.env.GROQ_API_KEY) {
13
12
  if (!apiKey) throw new Error("Missing GROQ_API_KEY");
14
13
  this.client = new Groq({ apiKey });
@@ -45,3 +44,5 @@ export class GroqEmbedding {
45
44
  return res.data.map(item => item.embedding);
46
45
  }
47
46
  }
47
+
48
+ module.exports = { GroqEmbedding };
@@ -1,4 +1,5 @@
1
- import crypto from "crypto";
1
+ // src/embeddings/local.js
2
+ const crypto = require("crypto");
2
3
 
3
4
  /**
4
5
  * LocalEmbedding
@@ -6,7 +7,7 @@ import crypto from "crypto";
6
7
  * Each string will produce a consistent vector based on a hash.
7
8
  * Note: Not semantic, just a placeholder for testing.
8
9
  */
9
- export class LocalEmbedding {
10
+ class LocalEmbedding {
10
11
  constructor(dim = 512) {
11
12
  this.dim = dim;
12
13
  }
@@ -40,7 +41,7 @@ export class LocalEmbedding {
40
41
  * Convenience function for index.js
41
42
  * Retries local embedding generation (mostly placeholder, but keeps API compatible)
42
43
  */
43
- export async function createEmbeddingWithRetry(text, options = {}, retries = 1) {
44
+ async function createEmbeddingWithRetry(text, options = {}, retries = 1) {
44
45
  const embedder = new LocalEmbedding();
45
46
  for (let attempt = 1; attempt <= retries; attempt++) {
46
47
  try {
@@ -50,3 +51,5 @@ export async function createEmbeddingWithRetry(text, options = {}, retries = 1)
50
51
  }
51
52
  }
52
53
  }
54
+
55
+ module.exports = { LocalEmbedding, createEmbeddingWithRetry };
@@ -1,6 +1,5 @@
1
1
  // src/embeddings/openai.js
2
-
3
- import OpenAI from "openai";
2
+ const OpenAI = require("openai");
4
3
 
5
4
  /**
6
5
  * OpenAIEmbedding
@@ -8,7 +7,7 @@ import OpenAI from "openai";
8
7
  *
9
8
  * Default model: text-embedding-3-large (best for RAG)
10
9
  */
11
- export class OpenAIEmbedding {
10
+ class OpenAIEmbedding {
12
11
  constructor(apiKey = process.env.OPENAI_API_KEY) {
13
12
  if (!apiKey) throw new Error("Missing OPENAI_API_KEY");
14
13
  this.client = new OpenAI({ apiKey });
@@ -45,3 +44,5 @@ export class OpenAIEmbedding {
45
44
  return res.data.map(item => item.embedding);
46
45
  }
47
46
  }
47
+
48
+ module.exports = { OpenAIEmbedding };
@@ -1,10 +1,10 @@
1
1
  // src/llm/anthropic.js
2
- import Anthropic from "@anthropic-ai/sdk";
2
+ const Anthropic = require("@anthropic-ai/sdk");
3
3
 
4
4
  /**
5
5
  * Anthropic LLM Provider
6
6
  */
7
- export default class AnthropicProvider {
7
+ class AnthropicProvider {
8
8
  constructor({ apiKey }) {
9
9
  if (!apiKey) console.warn("⚠️ WARNING: ANTHROPIC_API_KEY missing");
10
10
  this.client = new Anthropic({ apiKey });
@@ -34,3 +34,5 @@ export default class AnthropicProvider {
34
34
  return { done: true };
35
35
  }
36
36
  }
37
+
38
+ module.exports = AnthropicProvider;
package/src/llm/groq.js CHANGED
@@ -1,10 +1,11 @@
1
1
  // src/llm/groq.js
2
- // Groq LLM Provider (groq-sdk@0.5.0)
3
- // Multi-tenant safe, streaming-ready
2
+ const Groq = require("groq-sdk");
4
3
 
5
- import Groq from "groq-sdk";
6
-
7
- export default class GroqProvider {
4
+ /**
5
+ * Groq LLM Provider (groq-sdk@0.5.0)
6
+ * Multi-tenant safe, streaming-ready
7
+ */
8
+ class GroqProvider {
8
9
  /**
9
10
  * @param {Object} options
10
11
  * @param {string} options.apiKey - User-provided Groq API key
@@ -50,3 +51,5 @@ export default class GroqProvider {
50
51
  return { done: true };
51
52
  }
52
53
  }
54
+
55
+ module.exports = GroqProvider;
package/src/llm/openai.js CHANGED
@@ -1,11 +1,11 @@
1
1
  // src/llm/openai.js
2
- import OpenAI from "openai";
2
+ const OpenAI = require("openai");
3
3
 
4
4
  /**
5
5
  * OpenAI LLM Provider
6
6
  * Supports GPT-4, GPT-4o-mini, GPT-4o, and any Chat model
7
7
  */
8
- export default class OpenAIProvider {
8
+ class OpenAIProvider {
9
9
  constructor({ apiKey }) {
10
10
  if (!apiKey) console.warn("⚠️ WARNING: OPENAI_API_KEY missing");
11
11
  this.client = new OpenAI({ apiKey });
@@ -41,3 +41,5 @@ export default class OpenAIProvider {
41
41
  return { done: true };
42
42
  }
43
43
  }
44
+
45
+ module.exports = OpenAIProvider;
package/src/llm/router.js CHANGED
@@ -1,8 +1,9 @@
1
- import OpenAIProvider from "./openai.js";
2
- import GroqProvider from "./groq.js";
3
- import AnthropicProvider from "./anthropic.js";
1
+ // src/llm/router.js
2
+ const OpenAIProvider = require("./openai.js");
3
+ const GroqProvider = require("./groq.js");
4
+ const AnthropicProvider = require("./anthropic.js");
4
5
 
5
- export function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey } = {}) {
6
+ function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey } = {}) {
6
7
  switch ((provider || "").toLowerCase()) {
7
8
  case "openai":
8
9
  if (!openaiApiKey) throw new Error("Missing OpenAI API key");
@@ -20,3 +21,5 @@ export function createLLM({ provider, openaiApiKey, groqApiKey, anthropicApiKey
20
21
  throw new Error(`Unsupported LLM provider: ${provider}`);
21
22
  }
22
23
  }
24
+
25
+ module.exports = { createLLM };
@@ -1,12 +1,11 @@
1
1
  // src/rerank/cohere.js
2
-
3
- import Cohere from "cohere-ai";
2
+ const Cohere = require("cohere-ai");
4
3
 
5
4
  /**
6
5
  * CohereReranker
7
6
  * Uses Cohere Rerank API to reorder candidate documents/snippets
8
7
  */
9
- export class CohereReranker {
8
+ class CohereReranker {
10
9
  constructor(apiKey = process.env.COHERE_API_KEY) {
11
10
  if (!apiKey) throw new Error("Missing COHERE_API_KEY");
12
11
  Cohere.init(apiKey);
@@ -37,3 +36,5 @@ export class CohereReranker {
37
36
  return ranked;
38
37
  }
39
38
  }
39
+
40
+ module.exports = { CohereReranker };
@@ -1,12 +1,11 @@
1
1
  // src/rerank/groqRerank.js
2
-
3
- import Groq from "groq-sdk";
2
+ const Groq = require("groq-sdk");
4
3
 
5
4
  /**
6
5
  * GroqReranker
7
6
  * Uses Groq LLMs to rerank candidate documents/snippets given a query
8
7
  */
9
- export class GroqReranker {
8
+ class GroqReranker {
10
9
  constructor(apiKey = process.env.GROQ_API_KEY) {
11
10
  if (!apiKey) throw new Error("Missing GROQ_API_KEY");
12
11
  this.client = new Groq({ apiKey });
@@ -48,3 +47,5 @@ Return JSON array: [{"text": "...", "score": 0.95}, ...]
48
47
  }
49
48
  }
50
49
  }
50
+
51
+ module.exports = { GroqReranker };
@@ -1,12 +1,11 @@
1
1
  // src/rerank/local.js
2
-
3
- import { cosine } from "../utils/similarity.js";
2
+ const { cosine } = require("../utils/similarity.js");
4
3
 
5
4
  /**
6
5
  * LocalReranker
7
6
  * Simple fallback reranker using keyword overlap + cosine similarity
8
7
  */
9
- export class LocalReranker {
8
+ class LocalReranker {
10
9
  constructor() {}
11
10
 
12
11
  /**
@@ -41,3 +40,5 @@ export class LocalReranker {
41
40
  return results;
42
41
  }
43
42
  }
43
+
44
+ module.exports = { LocalReranker };
@@ -1,8 +1,7 @@
1
1
  // src/server/streamingHandler.js
2
-
3
- import express from "express";
4
- import { initSSE, sendSSE } from "../utils/sseStream.js";
5
- import { llmRouter } from "../llm/router.js";
2
+ const express = require("express");
3
+ const { initSSE, sendSSE } = require("../utils/sseStream.js");
4
+ const { createLLM } = require("../llm/router.js");
6
5
 
7
6
  const router = express.Router();
8
7
 
@@ -18,11 +17,19 @@ router.post("/stream", async (req, res) => {
18
17
  // Initialize SSE
19
18
  initSSE(res);
20
19
 
21
- // LLM router handles streaming token-by-token
22
- await llmRouter.streamQuery(query, provider, options, (token) => {
23
- sendSSE(res, { token });
20
+ // Create LLM instance
21
+ const llm = createLLM({
22
+ provider,
23
+ openaiApiKey: options?.openaiApiKey,
24
+ groqApiKey: options?.groqApiKey,
25
+ anthropicApiKey: options?.anthropicApiKey
24
26
  });
25
27
 
28
+ // Stream the query response
29
+ await llm.stream({ prompt: query, onToken: (token) => {
30
+ sendSSE(res, { token });
31
+ }});
32
+
26
33
  // End stream
27
34
  sendSSE(res, { done: true }, true);
28
35
 
@@ -32,4 +39,4 @@ router.post("/stream", async (req, res) => {
32
39
  }
33
40
  });
34
41
 
35
- export { router as streamingHandler };
42
+ module.exports = { router: router };
@@ -1,27 +1,16 @@
1
1
  // src/utils/chunker.js
2
-
3
- /**
4
- * chunkText
5
- * Split text into overlapping chunks
6
- *
7
- * @param {string} text - the full text
8
- * @param {number} chunkSize - number of characters per chunk
9
- * @param {number} overlap - number of characters overlap between chunks
10
- * @returns string[] - array of text chunks
11
- */
12
- export function chunkText(text, chunkSize = 1000, overlap = 200) {
13
- if (!text || !text.trim()) return [];
14
-
2
+ function chunkText(text, chunkSize, overlap) {
3
+ if (!text) return [];
15
4
  const chunks = [];
16
5
  let start = 0;
17
-
6
+
18
7
  while (start < text.length) {
19
8
  const end = Math.min(start + chunkSize, text.length);
20
- const chunk = text.slice(start, end);
21
- chunks.push(chunk);
22
-
23
- start += chunkSize - overlap; // move forward with overlap
9
+ chunks.push(text.slice(start, end));
10
+ start += chunkSize - overlap;
24
11
  }
25
-
12
+
26
13
  return chunks;
27
14
  }
15
+
16
+ module.exports = { chunkText };
@@ -1,8 +1,8 @@
1
- import fs from "fs";
2
- import path from "path";
3
- import { JSDOM } from "jsdom";
4
- import pdfParse from "pdf-parse";
5
- import { readFile } from "fs/promises";
1
+ // src/utils/extractText.js
2
+ const fs = require("fs");
3
+ const path = require("path");
4
+ const { JSDOM } = require("jsdom");
5
+ const pdfParse = require("pdf-parse");
6
6
 
7
7
  /**
8
8
  * extractTextFromFile
@@ -13,7 +13,7 @@ import { readFile } from "fs/promises";
13
13
  * - .pdf
14
14
  * - .docx (minimal)
15
15
  */
16
- export async function extractTextFromFile(filePath) {
16
+ async function extractTextFromFile(filePath) {
17
17
  const ext = path.extname(filePath).toLowerCase();
18
18
 
19
19
  if (ext === ".txt" || ext === ".md") {
@@ -34,7 +34,7 @@ export async function extractTextFromFile(filePath) {
34
34
 
35
35
  if (ext === ".docx") {
36
36
  // Minimal extraction using ZIP (can be improved)
37
- const { default: StreamZip } = await import("node-stream-zip");
37
+ const StreamZip = require("node-stream-zip").default;
38
38
  const zip = new StreamZip.async({ file: filePath });
39
39
  const content = await zip.entryData("word/document.xml");
40
40
  await zip.close();
@@ -49,7 +49,7 @@ export async function extractTextFromFile(filePath) {
49
49
  * Basic keyword extraction for lexical matching
50
50
  * Returns array of lowercase words, stripped of punctuation
51
51
  */
52
- export function extractKeywords(text = "") {
52
+ function extractKeywords(text = "") {
53
53
  if (!text) return [];
54
54
  return text
55
55
  .toLowerCase()
@@ -57,3 +57,5 @@ export function extractKeywords(text = "") {
57
57
  .split(/\s+/)
58
58
  .filter(Boolean);
59
59
  }
60
+
61
+ module.exports = { extractTextFromFile, extractKeywords };
@@ -1,8 +1,7 @@
1
1
  // src/utils/fileLoader.js
2
-
3
- import fs from "fs";
4
- import path from "path";
5
- import { extractTextFromFile } from "./extractText.js";
2
+ const fs = require("fs");
3
+ const path = require("path");
4
+ const { extractTextFromFile } = require("./extractText.js");
6
5
 
7
6
  /**
8
7
  * loadDocuments
@@ -13,7 +12,7 @@ import { extractTextFromFile } from "./extractText.js";
13
12
  * @param {string[]} exts - array of supported file extensions
14
13
  * @returns {Promise<Array<{ filePath: string, text: string }>>}
15
14
  */
16
- export async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".html", ".docx"]) {
15
+ async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".html", ".docx"]) {
17
16
  if (!fs.existsSync(dirPath)) return [];
18
17
 
19
18
  const files = fs.readdirSync(dirPath, { withFileTypes: true });
@@ -37,3 +36,5 @@ export async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".ht
37
36
 
38
37
  return docs;
39
38
  }
39
+
40
+ module.exports = { loadDocuments };
@@ -1,24 +1,15 @@
1
1
  // src/utils/highlight.js
2
-
3
- /**
4
- * highlightMatches
5
- * Wraps all occurrences of keywords in <mark> tags
6
- *
7
- * @param {string} text - original text
8
- * @param {string[]} keywords - array of keywords to highlight
9
- * @returns string - HTML-safe text with <mark> highlights
10
- */
11
- export function highlightMatches(text, keywords = []) {
12
- if (!text || !keywords || keywords.length === 0) return text;
13
-
14
- let highlighted = text;
15
-
16
- // escape special regex chars in keywords
17
- const escapedKeywords = keywords.map(k => k.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"));
18
-
19
- const pattern = new RegExp(`\\b(${escapedKeywords.join("|")})\\b`, "gi");
20
-
21
- highlighted = highlighted.replace(pattern, "<mark>$1</mark>");
22
-
23
- return highlighted;
2
+ function highlightMatches(text, keywords) {
3
+ if (!text || !keywords || !keywords.length) return text;
4
+
5
+ let result = text;
6
+ keywords.forEach(keyword => {
7
+ if (!keyword) return;
8
+ const regex = new RegExp(`(${keyword})`, 'gi');
9
+ result = result.replace(regex, '**$1**');
10
+ });
11
+
12
+ return result;
24
13
  }
14
+
15
+ module.exports = { highlightMatches };
@@ -7,7 +7,7 @@
7
7
  * @param {number[]} b
8
8
  * @returns {number} similarity score between -1 and 1
9
9
  */
10
- export function cosine(a, b) {
10
+ function cosine(a, b) {
11
11
  if (!a || !b || a.length !== b.length) return 0;
12
12
 
13
13
  let dot = 0, magA = 0, magB = 0;
@@ -30,7 +30,7 @@ export function cosine(a, b) {
30
30
  * @param {number[]} b
31
31
  * @returns {number} Euclidean distance
32
32
  */
33
- export function euclidean(a, b) {
33
+ function euclidean(a, b) {
34
34
  if (!a || !b || a.length !== b.length) return Infinity;
35
35
 
36
36
  let sum = 0;
@@ -40,3 +40,5 @@ export function euclidean(a, b) {
40
40
 
41
41
  return Math.sqrt(sum);
42
42
  }
43
+
44
+ module.exports = { cosine, euclidean };
@@ -8,7 +8,7 @@
8
8
  * @param {string} data
9
9
  * @param {boolean} [end=false] - whether this is the final message
10
10
  */
11
- export function sendSSE(res, data, end = false) {
11
+ function sendSSE(res, data, end = false) {
12
12
  res.write(`data: ${JSON.stringify(data)}\n\n`);
13
13
  if (end) res.write("event: end\ndata: [DONE]\n\n");
14
14
  }
@@ -19,7 +19,7 @@ export function sendSSE(res, data, end = false) {
19
19
  *
20
20
  * @param {import('http').ServerResponse} res
21
21
  */
22
- export function initSSE(res) {
22
+ function initSSE(res) {
23
23
  res.writeHead(200, {
24
24
  "Content-Type": "text/event-stream",
25
25
  "Cache-Control": "no-cache",
@@ -27,3 +27,5 @@ export function initSSE(res) {
27
27
  });
28
28
  res.write("\n");
29
29
  }
30
+
31
+ module.exports = { sendSSE, initSSE };